In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
documentation = pd.read_excel('diabetes.xlsx', sheet_name='documentation', skiprows=3, usecols="A:b")
documentation.head(9)

In [None]:
df = pd.read_excel('diabetes.xlsx', sheet_name='data', skiprows=2, usecols="A:I")
df.head()

In [None]:
df.describe()

In [None]:
columns_to_replace = ['gtt', 'blood_pressure', 'triceps_skin_fold_thickness', 'insulin', 'bmi', 'dpf', 'age']

df[columns_to_replace] = df[columns_to_replace].replace(0, np.nan)
df.describe()

In [None]:
def plot_distribution(df):
    # Iteriere über die Spalten des DataFrames
    for column in df.columns:
        # Erstelle ein neues Figure und Axes-Objekt für jede Spalte
        fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))
        
        # Histogramm
        ax[0].hist(df[column], df[column].count(), edgecolor='black')
        ax[0].set_title(f"Histogramm: {column}")
        ax[0].set_xlabel(column)
        ax[0].set_ylabel("Häufigkeit")
        
        # Boxplot
        sns.boxplot(data=df[column], ax=ax[1])
        ax[1].set_title(f"Boxplot: {column}")
        ax[1].set_ylabel(column)
        
        # Zeige das Figure
        plt.tight_layout()
        plt.show()

In [None]:
plot_distribution(df)

In [None]:
def plot_histograms_by_class(df, class_column):
    unique_classes = df[class_column].unique()
    
    # Iteriere über die Spalten des DataFrames
    for column in df.columns:
        if column != class_column:
            # Erstelle ein neues Figure und Axes-Objekt
            fig, ax = plt.subplots(figsize=(8, 4))
            
            # Histogramm für class = 0
            data_class_0 = df[df[class_column] == 0][column]
            ax.hist(data_class_0, bins=20, edgecolor='black', alpha=0.5, label='Class 0')
            
            # Histogramm für class = 1
            data_class_1 = df[df[class_column] == 1][column]
            ax.hist(data_class_1, bins=20, edgecolor='black', alpha=0.5, label='Class 1')
            
            ax.set_title(f"Histogramm: {column}")
            ax.set_xlabel(column)
            ax.set_ylabel("Häufigkeit")
            ax.legend()
            
            # Zeige das Figure
            plt.tight_layout()
            plt.show()

In [None]:
plot_histograms_by_class(df, "class")