**Mount Drive**

# **Mount Drive**

In [18]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [19]:
cd /content/drive/MyDrive/Workshop698/LabTest

/content/drive/MyDrive/Workshop698/LabTest


In [20]:
ls

app_penguins_rf_model.py            le_island.pkl           penguins_rf_model.pkl
best_logistic_regression_model.pkl  le_sex.pkl              penguins_size.csv
label_encoders.pkl                  penguins_labtest.ipynb  requirements.txt


# **Create Model**

In [21]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
import pickle

# Load the dataset
data = pd.read_csv('penguins_size.csv')

# Step 1: Data Cleansing

# Check for missing values
print(data.isnull().sum())

# Impute missing values for numeric columns using the mean
numeric_columns = ['culmen_length_mm', 'culmen_depth_mm', 'flipper_length_mm', 'body_mass_g']
imputer = SimpleImputer(strategy='mean')
data[numeric_columns] = imputer.fit_transform(data[numeric_columns])

# Impute missing values for categorical columns using the most frequent value
categorical_columns = ['species', 'island', 'sex']
imputer_cat = SimpleImputer(strategy='most_frequent')
data[categorical_columns] = imputer_cat.fit_transform(data[categorical_columns])

# Step 2: Encoding categorical variables
label_encoder = LabelEncoder()

# Encode 'species' as target variable (y)
data['species'] = label_encoder.fit_transform(data['species'])

# Encode categorical features: 'island' and 'sex'
data['island'] = label_encoder.fit_transform(data['island'])
data['sex'] = label_encoder.fit_transform(data['sex'])

# Step 3: Split the data into features (X) and target (y)
X = data[['island', 'culmen_length_mm', 'culmen_depth_mm', 'flipper_length_mm', 'body_mass_g', 'sex']]
y = data['species']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Logistic Regression model with GridSearchCV for hyperparameter tuning
# Define the logistic regression model
log_reg = LogisticRegression(max_iter=1000)

# Define the hyperparameters grid for GridSearchCV
param_grid = {
    'C': [0.01, 0.1, 1, 10],  # Regularization strength
    'solver': ['liblinear', 'saga'],  # Solvers
    'penalty': ['l1', 'l2']  # Regularization type
}

# Perform GridSearchCV to find the best parameters
grid_search = GridSearchCV(estimator=log_reg, param_grid=param_grid, cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

# Print the best parameters found by GridSearchCV
print("Best Parameters:", grid_search.best_params_)

# Step 5: Train the Logistic Regression model with the best parameters
best_lr = grid_search.best_estimator_

# Fit the model on the training data
best_lr.fit(X_train, y_train)

# Step 6: Model Evaluation

# Predict on the test set
y_pred = best_lr.predict(X_test)

# Confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Classification report (precision, recall, f1-score)
print("Classification Report:")
print(classification_report(y_test, y_pred))

species               0
island                0
culmen_length_mm      2
culmen_depth_mm       2
flipper_length_mm     2
body_mass_g           2
sex                  10
dtype: int64
Fitting 5 folds for each of 16 candidates, totalling 80 fits
Best Parameters: {'C': 10, 'penalty': 'l2', 'solver': 'liblinear'}
Confusion Matrix:
[[32  0  0]
 [ 0 16  0]
 [ 0  0 21]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        32
           1       1.00      1.00      1.00        16
           2       1.00      1.00      1.00        21

    accuracy                           1.00        69
   macro avg       1.00      1.00      1.00        69
weighted avg       1.00      1.00      1.00        69



# **Save Recommendation Model & Data**

In [22]:
import pickle
with open('penguin_log_reg_model.pkl', 'wb') as file:
    pickle.dump(best_lr, file)

print("Model saved successfully!")

Model saved successfully!


# **Write file for deploy streamlit**

In [23]:
%%writefile app_penguins_rf_model.py

# Import necessary libraries
import streamlit as st
import pickle
import numpy as np
import os

# Set page configuration
st.set_page_config(page_title="Penguin Species Predictor", page_icon="🐧", layout="centered")

# Set background image URL
background_image_url = "https://images.pexels.com/photos/300857/pexels-photo-300857.jpeg"

# Set desired colors
text_color = "#CC0099"  # Text color
result_bg_color = "#FFFAFA"  # Result background color

# Apply CSS for background and text colors
st.markdown(
    f"""
    <style>
    .stApp {{
        background-image: url('{background_image_url}');
        background-size: cover;
        background-position: center;
        height: 100vh;
    }}
    h1, h2, h3, p, div {{
        color: {text_color} !important;
    }}
    .result-container {{
        background-color: {result_bg_color};
        border-radius: 10px;
        padding: 20px;
        box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
        opacity: 0.9;
        border: 2px solid {text_color};
    }}
    .image-container {{
        display: flex;
        justify-content: center;
        align-items: center;
        margin: 20px 0; /* Add margin for spacing */
    }}
    </style>
    """,
    unsafe_allow_html=True
)

# Load the trained Random Forest model
model_path = 'penguins_rf_model.pkl'

# Ensure the model file exists
if os.path.exists(model_path):
    with open(model_path, 'rb') as file:
        rf_model = pickle.load(file)
else:
    st.error(f"Error: {model_path} not found. Please make sure the model file is available.")
    st.stop()

# Streamlit App
st.title("🐧 Penguin Species Prediction")
st.write("This app predicts the **species** of a penguin based on its physical characteristics. "
         "Please enter the details on the left and click 'Predict Species' to see the result.")

# Sidebar inputs for user to input penguin features
st.sidebar.header("Penguin Features")
st.sidebar.write("Provide the following features to predict the penguin's species:")

# Input options in sidebar with descriptions
island = st.sidebar.selectbox("Island", ['Biscoe', 'Dream', 'Torgersen'])
bill_length_mm = st.sidebar.slider("Bill Length (mm)", 32.1, 59.6, 45.0)
bill_depth_mm = st.sidebar.slider("Bill Depth (mm)", 13.1, 21.5, 17.2)
flipper_length_mm = st.sidebar.slider("Flipper Length (mm)", 172.0, 231.0, 200.0)
body_mass_g = st.sidebar.slider("Body Mass (g)", 2700, 6300, 4200)
sex = st.sidebar.selectbox("Sex", ['Male', 'Female'])

# Move the Predict button to the sidebar
if st.sidebar.button("Predict Species"):
    # Map the island and sex inputs to their encoded values
    island_map = {'Biscoe': 0, 'Dream': 1, 'Torgersen': 2}
    sex_map = {'Male': 0, 'Female': 1}

    # Convert inputs to model-compatible format
    input_data = np.array([[island_map[island], bill_length_mm, bill_depth_mm, flipper_length_mm, body_mass_g, sex_map[sex]]])

    # Predict species
    prediction = rf_model.predict(input_data)
    species_dict = {0: "Adelie", 1: "Chinstrap", 2: "Gentoo"}  # Mapping encoded values to species names
    species = species_dict[prediction[0]]

    # Display results with style
    st.subheader("Prediction Result")

    # Using markdown for better styling
    st.markdown(
        f"<div class='result-container'>"
        f"<h3 style='color: {text_color};'>The predicted species of the penguin is: <strong>{species}</strong></h3>"
        "</div>",
        unsafe_allow_html=True
    )

    # Show species image in the center
    species_image_path = f"{species.lower()}.jpg"  # Using the image name directly (ensure the images are in the same folder)
    if os.path.exists(species_image_path):
        st.image(species_image_path, width=300, caption=f"{species} Penguin", use_column_width='auto')
    else:
        st.warning(f"Image for {species} not found. Please ensure the image is in the same folder as this script.")

    # Display characteristics with a result container
    st.markdown(
        f"<div class='result-container'>"
        f"<h4 style='color: {text_color};'>Penguin Species Characteristics</h4>"
        f"<p style='color: {text_color};'>- Island: {island}</p>"
        f"<p style='color: {text_color};'>- Bill Length: {bill_length_mm} mm</p>"
        f"<p style='color: {text_color};'>- Bill Depth: {bill_depth_mm} mm</p>"
        f"<p style='color: {text_color};'>- Flipper Length: {flipper_length_mm} mm</p>"
        f"<p style='color: {text_color};'>- Body Mass: {body_mass_g} g</p>"
        f"<p style='color: {text_color};'>- Sex: {sex}</p>"
        "</div>",
        unsafe_allow_html=True
    )
else:
    st.info("Please enter the penguin features on the left and click 'Predict Species'.")

Writing app_penguins_rf_model.py


In [24]:
%%writefile requirements.txt

scikit-learn
pandas
streamlit
numpy

Overwriting requirements.txt
