In [6]:
# LOGISTIC REGRESSION
# 1 Data Exploration:
# a. Load the dataset and perform exploratory data analysis (EDA).
#b. Examine the features, their types, and summary statistics.
#c. Create visualizations such as histograms, box plots, or pair plots to visualize the distributions and relationships between features.
#Analyze any patterns or correlations observed in the data.
#2 Data Preprocessing:
#a. Handle missing values (e.g., imputation).
#b. Encode categorical variables.
#3 Model Building:
#a. Build a logistic regression model using appropriate libraries (e.g., scikit-learn).
#b. Train the model using the training data.
#4 Model Evaluation:
#a. Evaluate the performance of the model on the testing data using accuracy, precision, recall, F1-score, and ROC-AUC score.
#Visualize the ROC curve.
#5 Interpretation:
#a. Interpret the coefficients of the logistic regression model.
#b. Discuss the significance of features in predicting the target variable (survival probability in this case).
#6 Deployment with Streamlit:
#In this task, you will deploy your logistic regression model using Streamlit. The deployment can be done locally or online via Streamlit Share. Your task includes creating a Streamlit app in Python that involves loading your trained model and setting up user inputs for predictions. 

#(optional)For online deployment, use Streamlit Community Cloud, which supports deployment from GitHub repositories. 
#Detailed deployment instructions are available in the Streamlit Documentation.


In [28]:
# Importing Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve
import pickle
import streamlit as st

In [29]:
# Load the dataset
train_data=pd.read_csv("E:\Assignment\Logistic Regression\Titanic_train.csv")
data

Unnamed: 0,PassengerId,Pclass,Name,Age,SibSp,Parch,Ticket,Fare,Sex_male,Embarked_Q,Embarked_S
0,892,3,"Kelly, Mr. James",34.5,0,0,330911,7.8292,1,1,0
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",47.0,1,0,363272,7.0000,0,0,1
2,894,2,"Myles, Mr. Thomas Francis",62.0,0,0,240276,9.6875,1,1,0
3,895,3,"Wirz, Mr. Albert",27.0,0,0,315154,8.6625,1,0,1
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",22.0,1,1,3101298,12.2875,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",27.0,0,0,A.5. 3236,8.0500,1,0,1
414,1306,1,"Oliva y Ocana, Dona. Fermina",39.0,0,0,PC 17758,108.9000,0,0,0
415,1307,3,"Saether, Mr. Simon Sivertsen",38.5,0,0,SOTON/O.Q. 3101262,7.2500,1,0,1
416,1308,3,"Ware, Mr. Frederick",27.0,0,0,359309,8.0500,1,0,1


In [30]:
# Preprocess the training data
train_data['Age'].fillna(train_data['Age'].median(), inplace=True)
train_data['Fare'].fillna(train_data['Fare'].median(), inplace=True)
train_data['Embarked'].fillna(train_data['Embarked'].mode()[0], inplace=True)
train_data = pd.get_dummies(train_data, columns=['Sex', 'Embarked'], drop_first=True)

In [31]:
# Define the target variable and features
X_train = train_data.drop(['PassengerId', 'Name', 'Ticket', 'Cabin', 'Survived'], axis=1)
y_train = train_data['Survived']

In [32]:
# Train the logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

In [33]:
# Save the trained model
with open('logistic_regression_model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [34]:
# Load the test dataset (the provided file without 'Survived' column)
test_data = pd.read_csv('E:\Assignment\Logistic Regression\Titanic_test.csv')

In [35]:
# Preprocess the test data
test_data['Age'].fillna(test_data['Age'].median(), inplace=True)
test_data['Fare'].fillna(test_data['Fare'].median(), inplace=True)
test_data['Embarked'].fillna(test_data['Embarked'].mode()[0], inplace=True)
test_data = pd.get_dummies(test_data, columns=['Sex', 'Embarked'], drop_first=True)

In [36]:
# Ensure that the test dataset has the same features as the training dataset
missing_cols = set(X_train.columns) - set(test_data.columns)
for col in missing_cols:
    test_data[col] = 0
test_data = test_data[X_train.columns]

In [37]:
# Load the trained model and make predictions
with open('logistic_regression_model.pkl', 'rb') as f:
    model = pickle.load(f)

predictions = model.predict(test_data)

In [38]:
import streamlit as st
import pandas as pd
import pickle

# Load the trained model
model = pickle.load(open('logistic_regression_model.pkl', 'rb'))

# Define the user input function
def user_input_features():
    Pclass = st.selectbox('Pclass', [1, 2, 3])
    Age = st.slider('Age', 0, 80, 30)
    SibSp = st.slider('SibSp', 0, 8, 0)
    Parch = st.slider('Parch', 0, 6, 0)
    Fare = st.slider('Fare', 0, 500, 35)
    Sex_male = st.selectbox('Sex', ['male', 'female']) == 'male'
    Embarked_Q = st.selectbox('Embarked', ['Q', 'S', 'C']) == 'Q'
    Embarked_S = st.selectbox('Embarked', ['S', 'Q', 'C']) == 'S'
    
    data = {
        'Pclass': Pclass,
        'Age': Age,
        'SibSp': SibSp,
        'Parch': Parch,
        'Fare': Fare,
        'Sex_male': Sex_male,
        'Embarked_Q': Embarked_Q,
        'Embarked_S': Embarked_S
    }
    features = pd.DataFrame(data, index=[0])
    return features

input_df = user_input_features()

# Make predictions
prediction = model.predict(input_df)
prediction_proba = model.predict_proba(input_df)

st.subheader('Prediction')
survival = 'Survived' if prediction[0] else 'Not Survived'
st.write(survival)

st.subheader('Prediction Probability')
st.write(prediction_proba)

2024-07-20 13:05:00.826 
  command:

    streamlit run C:\Users\sbc\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
