In [3]:
import pandas as pd      
import numpy as np 
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import pickle

## Visual Python Upgrade
NOTE: 
- Refresh your web browser to start a new version.
- Save VP Note before refreshing the page.

In [2]:
# Visual Python
# !pip install visualpython --upgrade

In [3]:
# '# Visual Python
# !visualpy install'

In [4]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    mse = mean_squared_error(actual, pred)
    score = r2_score(actual, pred)
    return print(" r2_score:", score, "\n","mae:", mae, "\n","mse:",mse, "\n","rmse:",rmse)

In [5]:
df = pd.read_csv("df_FinalAuto_scout.csv")

In [6]:
df.head()

Unnamed: 0,make_model,body_type,price,vat,km,Gears,age,Previous_Owners,hp_kW,Inspection_new,Upholstery_type,Gearing_Type,Displacement_cc,Weight_kg,cons_comb
0,Audi A1,Sedans,15770,VAT deductible,56013.0,7.0,3.0,2.0,66.0,1,Cloth,Automatic,1422.0,1220.0,3.8
1,Audi A1,Sedans,14500,Price negotiable,80000.0,7.0,2.0,1.0,141.0,0,Cloth,Automatic,1798.0,1255.0,5.6
2,Audi A1,Sedans,14640,VAT deductible,83450.0,7.0,3.0,1.0,85.0,0,Cloth,Automatic,1598.0,1135.0,3.8
3,Audi A1,Sedans,14500,VAT deductible,73000.0,6.0,3.0,1.0,66.0,0,Cloth,Automatic,1422.0,1195.0,3.8
4,Audi A1,Sedans,16790,VAT deductible,16200.0,7.0,3.0,1.0,66.0,1,Cloth,Automatic,1422.0,1135.0,4.1


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13921 entries, 0 to 13920
Data columns (total 15 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   make_model       13921 non-null  object 
 1   body_type        13921 non-null  object 
 2   price            13921 non-null  int64  
 3   vat              13921 non-null  object 
 4   km               13921 non-null  float64
 5   Gears            13921 non-null  float64
 6   age              13921 non-null  float64
 7   Previous_Owners  13921 non-null  float64
 8   hp_kW            13921 non-null  float64
 9   Inspection_new   13921 non-null  int64  
 10  Upholstery_type  13921 non-null  object 
 11  Gearing_Type     13921 non-null  object 
 12  Displacement_cc  13921 non-null  float64
 13  Weight_kg        13921 non-null  float64
 14  cons_comb        13921 non-null  float64
dtypes: float64(8), int64(2), object(5)
memory usage: 1.6+ MB


In [13]:
df2 = df[["age", 'hp_kW', 'km' ,'Gearing_Type','make_model', "price" ]].copy()

In [14]:
df2.head()

Unnamed: 0,age,hp_kW,km,Gearing_Type,make_model,price
0,3.0,66.0,56013.0,Automatic,Audi A1,15770
1,2.0,141.0,80000.0,Automatic,Audi A1,14500
2,3.0,85.0,83450.0,Automatic,Audi A1,14640
3,3.0,66.0,73000.0,Automatic,Audi A1,14500
4,3.0,66.0,16200.0,Automatic,Audi A1,16790


In [15]:
X= df2.drop("price", axis=1)
y= df2["price"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)



In [32]:
cat = X_train.select_dtypes("object").columns
cat

Index(['Gearing_Type', 'make_model'], dtype='object')

In [33]:
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import OrdinalEncoder


ord_enc = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)

column_trans = make_column_transformer(
    (ord_enc, cat), remainder="passthrough", verbose_feature_names_out=False
).set_output(transform="pandas")

In [34]:
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor


operations = [
    ("OrdinalEncoder", column_trans),
    ("DT_model", DecisionTreeRegressor(random_state=101)),
]

pipe_model = Pipeline(steps=operations)

pipe_model.fit(X_train, y_train)

In [35]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [36]:
def train_val(model, X_train, y_train, X_test, y_test):
    y_pred = model.predict(X_test)
    y_train_pred = model.predict(X_train)

    scores = {
        "train": {
            "R2": r2_score(y_train, y_train_pred),
            "mae": mean_absolute_error(y_train, y_train_pred),
            "mse": mean_squared_error(y_train, y_train_pred, squared=True),
            "rmse": mean_squared_error(y_train, y_train_pred, squared=False),
        },
        "test": {
            "R2": r2_score(y_test, y_pred),
            "mae": mean_absolute_error(y_test, y_pred),
            "mse": mean_squared_error(y_test, y_pred, squared=True),
            "rmse": mean_squared_error(y_test, y_pred, squared=False),
        },
    }

    return pd.DataFrame(scores)

In [37]:
train_val(pipe_model, X_train, y_train, X_test, y_test)

Unnamed: 0,train,test
R2,0.9810101,0.8967622
mae,310.9029,1379.333
mse,1013984.0,5496845.0
rmse,1006.968,2344.535


In [38]:
pd.options.display.float_format = "{:.3f}".format
train_val(pipe_model, X_train, y_train, X_test, y_test)

Unnamed: 0,train,test
R2,0.981,0.897
mae,310.903,1379.333
mse,1013983.855,5496845.247
rmse,1006.968,2344.535


In [39]:
from sklearn.model_selection import cross_validate

operations = [
    ("OrdinalEncoder", column_trans),
    ("DT_model", DecisionTreeRegressor(random_state=101)),
]
model = Pipeline(steps=operations)

scores = cross_validate(
    model,
    X_train,
    y_train,
    scoring=[
        "r2",
        "neg_mean_absolute_error",
        "neg_mean_squared_error",
        "neg_root_mean_squared_error",
    ],
    cv=10,
    return_train_score=True,
)

df_scores = pd.DataFrame(scores)
df_scores.mean()[2:]

test_r2                                    0.892
train_r2                                   0.982
test_neg_mean_absolute_error           -1442.827
train_neg_mean_absolute_error           -303.103
test_neg_mean_squared_error         -5725014.421
train_neg_mean_squared_error         -987295.209
test_neg_root_mean_squared_error       -2386.379
train_neg_root_mean_squared_error       -993.560
dtype: float64

In [41]:
y_pred = pipe_model.predict(X_test)
eval_metrics(y_test, y_pred)

 r2_score: 0.8967622393667474 
 mae: 1379.33296144056 
 mse: 5496845.2472041715 
 rmse: 2344.5351878792885


In [42]:
filename = 'my_model'
pickle.dump(pipe_model, open(filename, 'wb'))

**deployment**

In [43]:
final_model = pickle.load(open('my_model', 'rb'))

In [44]:
columns=list(X.columns)
columns

['age', 'hp_kW', 'km', 'Gearing_Type', 'make_model']

In [50]:
my_dict = {
    "age": 2,
    'hp_kW': 1000,
    'km' : 85000,
    'Gearing_Type': "manual",
    'make_model' :"Audi A1",
}

In [51]:
df = pd.DataFrame.from_dict([my_dict])

In [52]:
df

Unnamed: 0,age,hp_kW,km,Gearing_Type,make_model
0,2,1000,85000,manual,Audi A1


In [53]:
prediction = final_model.predict(df)
print(prediction)

[57400.]


In [54]:
print("The estimated value of sales is {}. ".format(int(prediction[0])))

The estimated value of sales is 57400. 


In [55]:
conda env list

# conda environments:
#
mytest                   C:\Users\nurel\.conda\envs\mytest
base                  *  C:\Users\nurel\anaconda3


Note: you may need to restart the kernel to use updated packages.


In [56]:
pip list

Package                           Version
--------------------------------- ---------
absl-py                           1.4.0
aiofiles                          22.1.0
aiohttp                           3.8.4
aiosignal                         1.3.1
aiosqlite                         0.18.0
alembic                           1.11.2
altair                            4.2.2
anaconda-client                   1.11.1
anaconda-navigator                2.4.2
anaconda-project                  0.11.1
anyio                             3.5.0
appdirs                           1.4.4
argon2-cffi                       21.3.0
argon2-cffi-bindings              21.2.0
astroid                           2.15.6
astropy                           5.1
asttokens                         2.2.1
astunparse                        1.6.3
async-timeout                     4.0.2
attrs                             22.1.0
autopep8                          2.0.4
Babel                             2.11.0
backcall                  



In [None]:
#pip install -r requirements.txt# Streamlit Documentation: https://docs.streamlit.io/


In [57]:
pip list

Package                           Version
--------------------------------- ---------
absl-py                           1.4.0
aiofiles                          22.1.0
aiohttp                           3.8.4
aiosignal                         1.3.1
aiosqlite                         0.18.0
alembic                           1.11.2
altair                            4.2.2
anaconda-client                   1.11.1
anaconda-navigator                2.4.2
anaconda-project                  0.11.1
anyio                             3.5.0
appdirs                           1.4.4
argon2-cffi                       21.3.0
argon2-cffi-bindings              21.2.0
astroid                           2.15.6
astropy                           5.1
asttokens                         2.2.1
astunparse                        1.6.3
async-timeout                     4.0.2
attrs                             22.1.0
autopep8                          2.0.4
Babel                             2.11.0
backcall                  



In [None]:
!streamlit run my_app.py

In [None]:
import streamlit as st
import pandas as pd
import numpy as np
from PIL import Image  # to deal with images (PIL: Python imaging library)

In [None]:
# Title/Text
st.title("This is a title")
st.text("This is some test.")

In [None]:
# Markdown
st.markdown("Streamlit is **_really_ cool** :+1:")
st.markdown("# This is a markdown")
st.markdown("## This is a markdown")
st.markdown("### This is a markdown")

In [None]:
# Header/Subheader
st.header('This is a header')
st.subheader('This is a subheader')

In [None]:

# Success/Info/Error
st.success('This is a success message!')
st.info('This is a purely informational message')
st.error("This is an error.")
st.warning("This is a warning message!")
st.exception("NameError('name there is not defined')")


In [None]:
# Help
st.help(range)


In [None]:

# Write
st.write("Hello World! :sunglasses:")


In [None]:
# Add image
#img = Image.open("images.jpeg")
#st.image(img, caption="cattie", width=300)

In [None]:
# Add video

#my_video = open("videoname",'rb')
#st.video(my_video)


In [None]:
# Add youtube video
st.video("https://www.youtube.com/watch?v=uHKfrz65KSU")


In [None]:
cbox= st.checkbox("Hide and Seek")
if cbox :
    st.write("Hide")
else :
    st.write("Seek")


In [None]:
# Add radio button
status = st.radio("Select a color",("blue","orange","yellow"))
st.write("My favorite color is ", status)

In [None]:
# Add button
st.button("Click me")

if st.button("Press me") :
    st.success("Analyze Results are..")

In [None]:

# Add select box
occupation=st.selectbox("Your Occupation", ["Programmer", "DataScientist", "Doctor"])
st.write("Your Occupation is ", occupation)


In [None]:
# Multi_select
multi_select = st.multiselect("Select multiple numbers",[1,2,3,4,5])
st.write(f"You selected {len(multi_select)} number(s)")
st.write("Your selection is/are", multi_select)
for i in range(len(multi_select)):
    st.write(f"Your {i+1}. selection is {multi_select[i]}")


In [None]:

# Slider
option1 = st.slider("Select a number", min_value=5, max_value=70, step=5)
option2 = st.slider("Select a number", min_value=0.2, max_value=30.2, value=5.2, step=0.2)



In [None]:


result=option1*option2
st.write("multiplication of two options is:",result)


In [None]:


# Text_input
name = st.text_input("Enter your name", placeholder="Your name here please")
if st.button("Submit"):
    st.write("Hello {}".format(name.title()))


In [None]:

# Code  # to show as if code
st.code("import pandas as pd")
st.code("import pandas as pd\nimport numpy as np")


In [None]:

# Echo  # it is used "with block" to draw some code on the app, then execute it
with st.echo():
    import pandas as pd
    import numpy as np
    df = pd.DataFrame({"a":[1,2,3], "b":[4,5,6]})
    df


In [None]:

# Date input
import datetime
today=st.date_input("Today is")
date=st.date_input("Enter the date")


In [None]:

# Time input
the_time=st.time_input("The time is", datetime.time(8, 45))
hour=st.time_input(str(pd.Timestamp.now()))
st.write("Hour is", hour)


In [None]:

# Sidebar
st.sidebar.title("Sidebar title")
st.sidebar.header("Sidebar header")


In [None]:

# Sidebar with slider
a=st.sidebar.slider("input",0,5,2,1)
x=st.sidebar.slider("input2")
st.write("# sidebar input result")
st.success(a*x)


In [None]:

# Dataframe
#df=pd.read_csv(".csv")


In [None]:

# To display dataframe there are 3 methods

# Method 1
st.table(df.head())


In [None]:
# Method 2
st.write(df.head())  # dynamic, you can sort


In [None]:
# Method 3
st.dataframe(df.head()) 


In [None]:

# To load machine learning model
import pickle
filename = "my_model"
model=pickle.load(open(filename, "rb"))


In [None]:

# To take feature inputs
age = st.sidebar.number_input("TV:",min_value=0, max_value=20)
hp_kW = st.sidebar.number_input("radio:",min_value=25, max_value=1,176)
'km' = st.sidebar.number_input("newspaper:",min_value=0, max_value=200000)


In [None]:

# Create a dataframe using feature inputs
my_dict = {
    "age": 2,
    'hp_kW': 1500,
    'km' : 85000,
    'Gearing_Type': "manual",
    'make_model' :"Audi A1",
}
df = pd.DataFrame.from_dict([my_dict])
st.table(df)

In [None]:
# Prediction with user inputs
predict = st.button("Predict")
result =LRmodel_pipe.predict(df)
if predict :
    st.success(result[0])