In [None]:
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, classification_report, confusion_matrix

In [None]:
# Load the diabetes dataset from sklearn
diabetes = load_diabetes(as_frame=True)
df = diabetes.frame

In [None]:
# Assuming 'target' is the outcome variable and rest are features
X = df.drop(columns='target')
y = df['target']

In [None]:
# Univariate Analysis
def univariate_analysis(data):
    analysis = {}
    for column in data.columns:
        if data[column].dtype in [np.int64, np.float64]:
            analysis[column] = {
                'Mean': data[column].mean(),
                'Median': data[column].median(),
                'Mode': data[column].mode()[0],
                'Variance': data[column].var(),
                'Standard Deviation': data[column].std(),
                'Skewness': data[column].skew(),
                'Kurtosis': data[column].kurtosis(),
                'Frequency': data[column].value_counts().to_dict()
            }
    return analysis

univariate_stats = univariate_analysis(df)

In [None]:
# Display univariate statistics
for feature, stats in univariate_stats.items():
    print(f"Feature: {feature}")
    for stat_name, value in stats.items():
        print(f"  {stat_name}: {value}")
    print("\n")

Feature: age
  Mean: -2.511816797794472e-19
  Median: 0.005383060374248237
  Mode: 0.016280675727306498
  Variance: 0.002267573696145127
  Standard Deviation: 0.047619047619047644
  Skewness: -0.2313815329708844
  Kurtosis: -0.6712236886196017
  Frequency: {0.016280675727306498: 19, 0.04170844488444244: 17, 0.009015598825267658: 16, -0.027309785684926546: 15, -0.0018820165277906047: 14, -0.052737554842062495: 14, 0.04534098333546186: 14, 0.012648137276287077: 14, 0.06713621404157838: 13, 0.005383060374248237: 13, -0.005514554978810025: 12, 0.03081082953138418: 12, 0.001750521923228816: 12, 0.02354575262934534: 12, 0.04897352178648128: 11, 0.01991321417832592: 11, -0.04183993948900423: 11, -0.009147093429829445: 11, -0.02367724723390713: 10, 0.038075906433423026: 10, 0.0344433679824036: 9, -0.045472477940023646: 9, 0.027178291080364757: 9, -0.074532785548179: 8, 0.0707687524925978: 7, -0.016412170331868287: 7, -0.020044708782887707: 7, 0.059871137139539544: 7, -0.06000263174410134: 7, -

In [None]:
# Bivariate Analysis - Linear Regression
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
y_pred = lin_reg.predict(X_test)

In [None]:
# Print Linear Regression results
print("Linear Regression Results:")
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R^2 Score:", r2_score(y_test, y_pred))

Linear Regression Results:
Mean Squared Error: 2821.7509810013094
R^2 Score: 0.47728971643226203


In [None]:
# Logistic Regression (even though the target is continuous, just for demonstration)
# Binarize the target variable for logistic regression
y_binary = (y > y.median()).astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.3, random_state=42)
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)
y_pred_log = log_reg.predict(X_test)

In [None]:
# Print Logistic Regression results
print("\nLogistic Regression Results:")
print(confusion_matrix(y_test, y_pred_log))
print(classification_report(y_test, y_pred_log))


Logistic Regression Results:
[[56 16]
 [10 51]]
              precision    recall  f1-score   support

           0       0.85      0.78      0.81        72
           1       0.76      0.84      0.80        61

    accuracy                           0.80       133
   macro avg       0.80      0.81      0.80       133
weighted avg       0.81      0.80      0.80       133



In [None]:
# Multiple Regression Analysis
# Display the coefficients of the linear regression model
coefficients = pd.DataFrame(lin_reg.coef_, X.columns, columns=['Coefficient'])
print("\nLinear Regression Coefficients:")
print(coefficients)


Linear Regression Coefficients:
     Coefficient
age    29.254013
sex  -261.706469
bmi   546.299723
bp    388.398341
s1   -901.959668
s2    506.763241
s3    121.154351
s4    288.035267
s5    659.268951
s6     41.376701
