<a href="https://colab.research.google.com/github/Maryam512-math/My-Assignment-/blob/main/Untitled4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, f1_score

# Load and preprocess data
data = pd.read_csv("/content/drive/MyDrive/CVD_cleaned.csv")
data.dropna(axis=1, inplace=True)

# Encode target
le = LabelEncoder()
data['Heart_Disease'] = le.fit_transform(data['Heart_Disease'])

X = data.drop('Heart_Disease', axis=1)
y = data['Heart_Disease']

# Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include='object').columns
numerical_cols = X.select_dtypes(include=np.number).columns

# Create preprocessing pipelines for numerical and categorical features
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

# Create a column transformer to apply different transformations to different columns
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)])

# Create a pipeline that first preprocesses the data and then scales it
pipeline = Pipeline(steps=[('preprocessor', preprocessor)])


# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply preprocessing to training and testing data
X_train_processed = pipeline.fit_transform(X_train)
X_test_processed = pipeline.transform(X_test)

In [14]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(max_iter=1000)
lr.fit(X_train_processed, y_train)
y_pred_lr = lr.predict(X_test_processed)

print("Logistic Regression")
print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print("F1 Score:", f1_score(y_test, y_pred_lr, average='weighted'))

Logistic Regression
Accuracy: 0.9192177559048744
F1 Score: 0.889279876451409


In [15]:
import joblib

# Save model
joblib.dump(lr, "logistic_regression_model.pkl")

['logistic_regression_model.pkl']

In [None]:
# Print the classification report
print(classification_report(y_test, y_pred))

In [16]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier()
rf.fit(X_train_processed, y_train)
y_pred_rf = rf.predict(X_test_processed)

print("Random Forest")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("F1 Score:", f1_score(y_test, y_pred_rf, average='weighted'))

Random Forest
Accuracy: 0.91835974810186
F1 Score: 0.8869654783246659


In [17]:
joblib.dump(rf, "random_forest_model.pkl")

['random_forest_model.pkl']

In [9]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier()
knn.fit(X_train_processed, y_train)
y_pred_knn = knn.predict(X_test_processed)

print("KNN")
print("Accuracy:", accuracy_score(y_test, y_pred_knn))
print("F1 Score:", f1_score(y_test, y_pred_knn, average='weighted'))

KNN
Accuracy: 0.9101843907335158
F1 Score: 0.8873443021475063


In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Prepare labels for Keras
num_classes = len(np.unique(y))
y_train_cat = to_categorical(y_train, num_classes)
y_test_cat = to_categorical(y_test, num_classes)

model_dl = Sequential([
    Dense(128, activation='relu', input_dim=X_train_processed.shape[1]),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(num_classes, activation='softmax')
])

model_dl.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_dl.fit(X_train_processed, y_train_cat, epochs=50, batch_size=16, verbose=1)

y_pred_dl = model_dl.predict(X_test_processed).argmax(axis=1)

print("Deep Learning")
print("Accuracy:", accuracy_score(y_test, y_pred_dl))
print("F1 Score:", f1_score(y_test, y_pred_dl, average='weighted'))

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m15443/15443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 1ms/step - accuracy: 0.9181 - loss: 0.2374
Epoch 2/50
[1m15443/15443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1ms/step - accuracy: 0.9181 - loss: 0.2262
Epoch 3/50
[1m15443/15443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 1ms/step - accuracy: 0.9193 - loss: 0.2243
Epoch 4/50
[1m15443/15443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1ms/step - accuracy: 0.9188 - loss: 0.2249
Epoch 5/50
[1m15443/15443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1ms/step - accuracy: 0.9181 - loss: 0.2254
Epoch 6/50
[1m15443/15443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 1ms/step - accuracy: 0.9194 - loss: 0.2234
Epoch 7/50
[1m15443/15443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 1ms/step - accuracy: 0.9188 - loss: 0.2246
Epoch 8/50
[1m15443/15443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 1ms/step - accuracy: 0.9191 - loss: 0.2238
Epoch 9/50


In [12]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.47.0-py3-none-any.whl.metadata (9.0 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.47.0-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m94.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m129.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hIns

In [18]:
import streamlit as st
import pandas as pd
import numpy as np
import pickle
import os

# Load trained model and label encoder
@st.cache_resource
def load_model():
    # Get the current working directory
    current_dir = os.getcwd()
    model_path = os.path.join(current_dir, "random_forest_model.pkl")
    encoder_path = os.path.join(current_dir, "label_encoder.pkl")
    symptoms_path = os.path.join(current_dir, "symptom_list.pkl")

    try:
        with open(model_path, "rb") as f:
            model = pickle.load(f)
        with open(encoder_path, "rb") as f:
            encoder = pickle.load(f)
        with open(symptoms_path, "rb") as f:
            symptoms = pickle.load(f)
        return model, encoder, symptoms
    except FileNotFoundError as e:
        st.error(f"Error loading model files: {e}. Make sure you have run the cells to save the model and encoder.")
        return None, None, None


model, label_encoder, all_symptoms = load_model()

if model and label_encoder and all_symptoms:
    # Title
    st.title("Disease Prediction from Symptoms")
    st.write("Input the presence (1) or absence (0) of symptoms below:")

    # User input
    user_input = {}
    for symptom in all_symptoms:
        user_input[symptom] = st.selectbox(f"{symptom.replace('_',' ').capitalize()}", [0, 1], key=symptom)

    # Prediction
    if st.button("Predict Disease"):
        input_df = pd.DataFrame([user_input])
        prediction = model.predict(input_df)[0]
        disease_name = label_encoder.inverse_transform([prediction])[0]
        st.success(f"🩺 Predicted Disease: **{disease_name}**")
else:
    st.warning("Model files not loaded. Please run the cells to save the model and encoder, then rerun this cell.")



In [21]:
%%writefile app.py
import joblib
import streamlit as st
import pandas as pd

# Load the saved model
model = joblib.load("logistic_regression_model.pkl")

# Create a dictionary with example feature values (replace with actual values)
# The keys should match the original column names of your data (excluding the target)
example_input = {
    'General_Health': 'Very Good',
    'Checkup': 'Within the past year',
    'Exercise': 'Yes',
    'Skin_Cancer': 'No',
    'Other_Cancer': 'No',
    'Depression': 'No',
    'Diabetes': 'No',
    'Arthritis': 'No',
    'Sex': 'Female',
    'Age_Category': '55-59',
    'Height_(cm)': 163.0,
    'Weight_(kg)': 75.0,
    'BMI': 28.37,
    'Smoking_History': 'Yes',
    'Alcohol_Consumption': 0.0,
    'Fruit_Consumption': 30.0,
    'Green_Vegetables_Consumption': 16.0,
    'FriedPotato_Consumption': 12.0
}


# Convert the example input to a DataFrame
input_df = pd.DataFrame([example_input])

# Preprocess the input data using the same pipeline used for training
# Make sure the 'pipeline' variable is defined in a previous cell and has been fitted on the training data
try:
    input_processed = pipeline.transform(input_df)

    # Make a prediction
    prediction = model.predict(input_processed)[0]

    # Assuming you have the label_encoder from the first cell
    # If not, you might need to load it or recreate it
    # For demonstration, let's assume 0 and 1 are the encoded classes
    predicted_class = "No Heart Disease" if prediction == 0 else "Heart Disease"

    st.write("Prediction:", predicted_class)

except NameError:
    st.error("The 'pipeline' object is not defined. Please run the first cell to define and fit the preprocessing pipeline.")
except Exception as e:
    st.error(f"An error occurred during prediction: {e}")

Overwriting app.py


In [1]:
!pip install streamlit pyngrok --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m51.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m66.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
!ngrok config add-authtoken 2zuJuuww7wEH0TCZr10f8Eo7RBS_2CWSdiVDVie8PCSgnTBA6

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
from pyngrok import ngrok

# Set up tunnel
public_url = ngrok.connect(8501)
print(f"Streamlit app is live at: {public_url}")

# Run streamlit
!streamlit run app.py &

Streamlit app is live at: NgrokTunnel: "https://968e0a9fea52.ngrok-free.app" -> "http://localhost:8501"

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.46.115.249:8501[0m
[0m


In [20]:
import joblib
import streamlit as st
import pandas as pd

# Load the saved model
model = joblib.load("logistic_regression_model.pkl")

# Create a dictionary with example feature values (replace with actual values)
# The keys should match the original column names of your data (excluding the target)
example_input = {
    'General_Health': 'Very Good',
    'Checkup': 'Within the past year',
    'Exercise': 'Yes',
    'Skin_Cancer': 'No',
    'Other_Cancer': 'No',
    'Depression': 'No',
    'Diabetes': 'No',
    'Arthritis': 'No',
    'Sex': 'Female',
    'Age_Category': '55-59',
    'Height_(cm)': 163.0,
    'Weight_(kg)': 75.0,
    'BMI': 28.37,
    'Smoking_History': 'Yes',
    'Alcohol_Consumption': 0.0,
    'Fruit_Consumption': 30.0,
    'Green_Vegetables_Consumption': 16.0,
    'FriedPotato_Consumption': 12.0
}


# Convert the example input to a DataFrame
input_df = pd.DataFrame([example_input])

# Preprocess the input data using the same pipeline used for training
# Make sure the 'pipeline' variable is defined in a previous cell and has been fitted on the training data
try:
    input_processed = pipeline.transform(input_df)

    # Make a prediction
    prediction = model.predict(input_processed)[0]

    # Assuming you have the label_encoder from the first cell
    # If not, you might need to load it or recreate it
    # For demonstration, let's assume 0 and 1 are the encoded classes
    predicted_class = "No Heart Disease" if prediction == 0 else "Heart Disease"

    st.write("Prediction:", predicted_class)

except NameError:
    st.error("The 'pipeline' object is not defined. Please run the first cell to define and fit the preprocessing pipeline.")
except Exception as e:
    st.error(f"An error occurred during prediction: {e}")



In [None]:
joblib.dump(scaler, "scaler.pkl")

# In Streamlit
scaler = joblib.load("scaler.pkl")
scaled_input = scaler.transform([user_input])
