In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv("CVD_cleaned.csv")

In [None]:
def df_info(df):
    df.shape
    df.info()
    print(df.head())

In [None]:
df_info(data)

In [None]:
def graph_plot(df, x, y=None, plot_type="auto"):
    x_type = df[x].dtype
    if y:
        y_type = df[y].dtype
    else:
        y_type = None
        
    if plot_type == "auto":
        if y is None:
            if pd.api.types.is_numeric_dtype(x_type):
                plot_type = "hist"
            else:
                plot_type = "count"
        elif pd.api.types.is_numeric_dtype(x_type) and pd.api.types.is_numeric_dtype(y_type):
            plot_type = "scatter"
        elif pd.api.types.is_categorical_dtype(x_type) or pd.api.types.is_object_dtype(x_type):
            if pd.api.types.is_numeric_dtype(y_type):
                plot_type = "box"
            else:
                plot_type = "bar"
        
    plt.figure(figsize=(10, 6))
    
    if plot_type == "hist":
        sns.histplot(df[x], kde=True)
        plt.title(f"Distribution de {x}")
        plt.xlabel(x)
        plt.ylabel("Fréquence")
        
    elif plot_type == "scatter":
        sns.scatterplot(data=df, x=x, y=y, alpha=0.6)
        plt.title(f"{x} vs {y}")
        plt.xlabel(x)
        plt.ylabel(y)
        
    elif plot_type == "count":
        sns.countplot(data=df, x=x)
        plt.title(f"Répartition de {x}")
        plt.xlabel(x)
        plt.ylabel("Nombre")
        
    elif plot_type == "box":
        sns.boxplot(data=df, x=x, y=y)
        plt.title(f"{x} vs {y}")
        plt.xlabel(x)
        plt.ylabel(y)
        
    elif plot_type == "bar":
        contingency = df.groupby([x, y]).size().reset_index(name="count")
        sns.barplot(data=contingency, x=x, y="count", hue=y)
        plt.title(f"{x} vs {y}")
        plt.xlabel(x)
        plt.ylabel("Nombre")
        
    else:
        raise ValueError("plot_type doit être 'hist', 'scatter', 'count', 'box' ou 'bar'")
    
    plt.tight_layout()
    plt.show()

In [None]:
# General_Health,Checkup,Exercise,Heart_Disease,Skin_Cancer,Other_Cancer,Depression,Diabetes,Arthritis,Sex,Age_Category,Height_(cm),Weight_(kg),BMI,Smoking_History,Alcohol_Consumption,Fruit_Consumption,Green_Vegetables_Consumption,FriedPotato_Consumption
graph_plot(data, "General_Health")
graph_plot(data, "Checkup")
graph_plot(data, "Exercise")
graph_plot(data, "Heart_Disease")
graph_plot(data, "Skin_Cancer")
graph_plot(data, "Other_Cancer")
graph_plot(data, "Depression")
graph_plot(data, "Diabetes")
graph_plot(data, "Arthritis")
graph_plot(data, "Sex")
graph_plot(data, "Age_Category")
graph_plot(data, "Height_(cm)")
graph_plot(data, "Weight_(kg)")
graph_plot(data, "BMI")
graph_plot(data, "Smoking_History")
graph_plot(data, "Alcohol_Consumption")
graph_plot(data, "Fruit_Consumption")
graph_plot(data, "Green_Vegetables_Consumption")
graph_plot(data, "FriedPotato_Consumption")

In [None]:
graph_plot(data, "General_Health", "Heart_Disease")
graph_plot(data, "Checkup", "Heart_Disease")
graph_plot(data, "Exercise", "Heart_Disease")
graph_plot(data, "Skin_Cancer", "Heart_Disease")
graph_plot(data, "Other_Cancer", "Heart_Disease")
graph_plot(data, "Depression", "Heart_Disease")
graph_plot(data, "Diabetes", "Heart_Disease")
graph_plot(data, "Arthritis", "Heart_Disease")
graph_plot(data, "Age_Category", "Heart_Disease")
graph_plot(data, "Heart_Disease", "BMI")
graph_plot(data, "Heart_Disease", "Smoking_History")
graph_plot(data, "Heart_Disease", "Alcohol_Consumption")
graph_plot(data, "Heart_Disease", "Fruit_Consumption")
graph_plot(data, "Heart_Disease", "Green_Vegetables_Consumption")
graph_plot(data, "Heart_Disease", "FriedPotato_Consumption")