In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, classification_report
import pickle

# Load dataset
df = pd.read_csv('/content/brain_stroke.csv')

# Drop missing values
df = df.dropna()

# Encode categorical columns
label_encoders = {}
for column in df.select_dtypes(include='object').columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

# Downsample the majority class
stroke_df = df[df['stroke'] == 1]
non_stroke_df = df[df['stroke'] == 0]
non_stroke_downsampled = non_stroke_df.sample(n=len(stroke_df), random_state=42)
df_balanced = pd.concat([stroke_df, non_stroke_downsampled]).sample(frac=1, random_state=42)

# Features and target
X = df_balanced.drop('stroke', axis=1)
y = df_balanced['stroke']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -------------------- LINEAR REGRESSION --------------------
print("\n===== Linear Regression =====")
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)
y_pred_lr_class = (y_pred_lr >= 0.5).astype(int)

print(classification_report(y_test, y_pred_lr_class))
print(f"MSE (Linear Regression): {mean_squared_error(y_test, y_pred_lr):.4f}")

with open('stroke_model.pkl', 'wb') as f:
    pickle.dump({'model': lr_model, 'encoders': label_encoders}, f)



print("\n model saved: 'stroke_model_linear.pkl'")



===== Linear Regression =====
              precision    recall  f1-score   support

           0       0.87      0.75      0.80        52
           1       0.76      0.87      0.81        47

    accuracy                           0.81        99
   macro avg       0.81      0.81      0.81        99
weighted avg       0.82      0.81      0.81        99

MSE (Linear Regression): 0.1571

 model saved: 'stroke_model_linear.pkl'


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[column] = le.fit_transform(df[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[column] = le.fit_transform(df[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[column] = le.fit_transform(df[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .l