In [1]:
import pandas as pd
import numpy as np
import streamlit as st

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor

In [2]:
mpgdf=pd.read_csv(r"C:\Users\hp\Python exercise\Auto MPG APP\AutoMPGReg.csv")

In [3]:
mpgdf.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,modelyear,origin,carname
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


In [4]:
mpgdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   horsepower    398 non-null    object 
 4   weight        398 non-null    int64  
 5   acceleration  398 non-null    float64
 6   modelyear     398 non-null    int64  
 7   origin        398 non-null    int64  
 8   carname       398 non-null    object 
dtypes: float64(3), int64(4), object(2)
memory usage: 28.1+ KB


In [5]:
mpgdf.horsepower=pd.to_numeric(mpgdf.horsepower, errors="coerce")

In [6]:
mpgdf.horsepower=mpgdf.horsepower.fillna(mpgdf.horsepower.median())

In [7]:
# Split Data into independent variable and dependent variables
y=mpgdf.mpg
X=mpgdf.drop(['carname','mpg'], axis=1)

In [8]:
# Defining Multiple Models as a "Dictionary"
models={'Linear Regression':LinearRegression(),'Decision Tree':DecisionTreeRegressor(),'Random Forest':RandomForestRegressor(),
        'Gradient Boosting':GradientBoostingRegressor()}

In [9]:
# Side bar for Model Selection
selected_model=st.sidebar.selectbox("Select a ML model", list(models.keys()))
# The above line of code let's us selecting the model we want for result

2024-02-06 15:31:02.575 
  command:

    streamlit run C:\Users\hp\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [10]:
# ML model Selection Parameters
if selected_model=='Linear Regression':
    model=LinearRegression()
elif selected_model=='Decision Tree':
    max_depth=st.sidebar.slider("max_depth",8,16,2)
    model=DecisionTreeRegressor(max_depth=max_depth)
elif selected_model=='Random Forest':
    n_estimators=st.slider.sidebar("Num of Trees",100,500,50)  
#in the brackets(starting count of the slider, max count of the slider, how many steps you want)
    model=RandomForestRegressor(n_estimators=n_estimators)
elif selected_model=='Gradient Boosting':
    n_estimators=st.sidebar.slider("Num of Trees",100,500,50)
    model=GradientBoostingRegressor(n_estimators=n_estimators)

In [11]:
# Train the model
model.fit(X,y)
# So by default it takes the Liner Regression

In [12]:
# Define the Application Page Parameters
st.title("Predict Mileage per Gallon")
st.markdown("Model to Predict Mileage of a car")
st.header("Car Features")


# Now we are defining how many columns we want
col1,col2,col3,col4=st.columns(4)
with col1:
    cylinders=st.slider("Cylinders", 2,8,1)
# In the bracket(Slider lowest value, highest value,step size)
    displacement=st.slider("Displacement", 50,500,10)
with col2:
    horsepower=st.slider('Horse Power', 50,500,10)
    weight=st.slider("Weight",1500,6000,250)
with col3:
    acceleration=st.slider("Acceleration", 8,25,1)
    modelyear=st.slider("Year", 70,85,1)
with col4:
    origin=st.slider("Origin",1,3,1)

In [13]:
# If you want the RSquare also, you can do the following
rsquare=model.score(X,y)
# Model Predictions
y_pred=model.predict(np.array([[cylinders,displacement,horsepower,weight,acceleration,modelyear,origin]]))



In [14]:
# To Display results
st.header("ML Model Results")
st.write(f"Selected Model: {selected_model}")
st.write(f"Rsquare:{rsquare}")
st.write(f"Predicted:{y_pred}")
# Here "f" means formatting function, as the output needs to be displayed , hence represent output format 

In [19]:
# TO check the versions for mentioning in the Github requirements file
print(pd.__version__)
print(np.__version__)
print(st.__version__)

2.1.1
1.24.3
1.31.0
