In [3]:
# Install Required Libraries
!pip install flask xgboost joblib pandas scikit-learn --quiet

# Upload Dataset Manually in Google Colab
from google.colab import files
import pandas as pd
import io

uploaded = files.upload()  # Upload CSV manually

# Load CSV into DataFrame
csv_filename = list(uploaded.keys())[0]
df = pd.read_csv(io.BytesIO(uploaded[csv_filename]))

# Data Preprocessing
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Fill missing BMI values with median
df['bmi'].fillna(df['bmi'].median(), inplace=True)

# Encode categorical features
label_cols = ['gender', 'ever_married', 'work_type', 'Residence_type', 'smoking_status']
le = LabelEncoder()
for col in label_cols:
    df[col] = le.fit_transform(df[col])

# Scale numerical features
scaler = StandardScaler()
df[['age', 'avg_glucose_level', 'bmi']] = scaler.fit_transform(df[['age', 'avg_glucose_level', 'bmi']])

# Calculate stroke percentage
stroke_percentage = (df['stroke'].sum() / len(df)) * 100
print(f'Stroke percentage: {stroke_percentage:.2f}%')

# Split data into features (X) and target (y)
X = df.drop(columns=['stroke'])
y = df['stroke']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Train the XGBoost Model
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report
import joblib

model = xgb.XGBClassifier(n_estimators=300, learning_rate=0.05, max_depth=6, random_state=42)
model.fit(X_train, y_train)

# Model Evaluation
y_pred = model.predict(X_test)
print(f"✅ Model Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("Classification Report:\n", classification_report(y_test, y_pred))

# Save the model
joblib.dump(model, "brain_stroke_model.pkl")

# 🚀 Step 5: Deploy Using Flask API
from flask import Flask, request, jsonify

app = Flask(__name__)

# Load trained model
model = joblib.load("brain_stroke_model.pkl")

@app.route('/predict', methods=['POST'])
def predict():
    try:
        data = request.get_json()
        features = data['features']  # Expecting a list of features
        prediction = model.predict([features])[0]
        return jsonify({"stroke_prediction": int(prediction)})
    except Exception as e:
        return jsonify({"error": str(e)})

if __name__ == '__main__':
    print("🚀 Running Flask Server...")
    app.run(host='0.0.0.0', port=5000, debug=False)


Saving brain_stroke.csv to brain_stroke (2).csv
Stroke percentage: 4.98%


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['bmi'].fillna(df['bmi'].median(), inplace=True)


✅ Model Accuracy: 0.9509
Classification Report:
               precision    recall  f1-score   support

           0       0.95      1.00      0.97       947
           1       0.67      0.04      0.08        50

    accuracy                           0.95       997
   macro avg       0.81      0.52      0.53       997
weighted avg       0.94      0.95      0.93       997

🚀 Running Flask Server...
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
