In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
df = pd.read_csv("Nepali_Treking_dataset.csv")
df.head(2)

Unnamed: 0.1,Unnamed: 0,Trek,Cost,Time,Trip Grade,Max Altitude,Accomodation,Best Travel Time,Date of Travel,Sex,...,Regional code,Country,Fitness Level,Weather Conditions,Trekking Group Size,Guide/No Guide,Equipment Used,Purpose of Travel,Health Incidents,Review/Satisfaction
0,0,Everest Base Camp Trek,"\n$1,420 USD",16 Days,Moderate,5545 m,Hotel/Guesthouse,March - May & Sept - Dec,9/13/2022,Non-Binary,...,0,total,Intermediate,Clear,10.0,Guide,Poles,Leisure,,4.5
1,1,Everest Base Camp Short Trek,"\n$1,295 USD",14 Days,Moderate,5545 m,Hotel/Guesthouse,March - May & Sept - Dec,9/4/2021,Female,...,40,Afghanistan,,,,,,,,


In [4]:
df.drop_duplicates(inplace=True)

In [5]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_column', None)

In [6]:
df.head(2)

Unnamed: 0.1,Unnamed: 0,Trek,Cost,Time,Trip Grade,Max Altitude,Accomodation,Best Travel Time,Date of Travel,Sex,Age,Employment Type,GraduateOrNot,AnnualIncome,FrequentFlyer,Year,Regional code,Country,Fitness Level,Weather Conditions,Trekking Group Size,Guide/No Guide,Equipment Used,Purpose of Travel,Health Incidents,Review/Satisfaction
0,0,Everest Base Camp Trek,"\n$1,420 USD",16 Days,Moderate,5545 m,Hotel/Guesthouse,March - May & Sept - Dec,9/13/2022,Non-Binary,31,Government Sector,Yes,400000,No,2017,0,total,Intermediate,Clear,10.0,Guide,Poles,Leisure,,4.5
1,1,Everest Base Camp Short Trek,"\n$1,295 USD",14 Days,Moderate,5545 m,Hotel/Guesthouse,March - May & Sept - Dec,9/4/2021,Female,31,Private Sector/Self Employed,Yes,1250000,No,2017,40,Afghanistan,,,,,,,,


In [7]:
df = df.drop(columns={'Unnamed: 0', 'Cost' ,'Accomodation', 'Best Travel Time', 'Date of Travel', 'Employment Type', 'GraduateOrNot', 'AnnualIncome', 'Year', 'Regional code', 'Country', 'Review/Satisfaction', 'FrequentFlyer', 'Fitness Level', 'Purpose of Travel', 'Weather Conditions', 'Health Incidents', 'Trekking Group Size', 'Guide/No Guide', 'Equipment Used'})

In [8]:
df

Unnamed: 0,Trek,Time,Trip Grade,Max Altitude,Sex,Age
0,Everest Base Camp Trek,16 Days,Moderate,5545 m,Non-Binary,31
1,Everest Base Camp Short Trek,14 Days,Moderate,5545 m,Female,31
2,Everest Base Camp Heli Shuttle Trek,12 Days,Moderate,5545 m,Female,34
3,Everest Base Camp Heli Trek,11 Days,Moderate,5545 m,Non-Binary,28
4,Everest Base Camp Trek for Seniors,20 Days,Moderate,5545 m,Non-Binary,28
5,Everest Chola Pass Trek,19 Days,Strenuous,5545 m,Non-Binary,25
6,Gokyo Lake Renjo La Pass Trek,16 Days,Moderate,5360 m,Male,31
7,Everest High Passes Trek,22 Days,Strenuous,5545 m,Transgender,31
8,Short Everest Trek,7 Days,Easy To Moderate,3900 m,Female,28
9,Everest Panorama Trek,10 Days,Easy To Moderate,3860 m,Female,33


In [9]:
df.isnull().sum()

Trek            0
Time            0
Trip Grade      0
Max Altitude    0
Sex             0
Age             0
dtype: int64

In [10]:
t = df['Trek'].unique()

for a in t:
    print(a)

Everest Base Camp Trek
Everest Base Camp Short Trek
Everest Base Camp Heli Shuttle Trek
Everest Base Camp Heli Trek
Everest Base Camp Trek for Seniors
Everest Chola Pass Trek
Gokyo Lake Renjo La Pass Trek
Everest High Passes Trek
Short Everest Trek
Everest Panorama Trek
Everest View Trek
Luxury Everest Base Camp Heli Trek
Everest Base Camp Trek with Chola and Renjo La Pass
Annapurna Base Camp Trek
Annapurna Base Camp Short Trek
Instant Annapurna Base Camp Trek
Annapurna Base Camp Heli Trek
Mardi Himal Trek
Mardi Himal Trekking
Classic Annapurna Circuit Trek
Annapurna Circuit Trek
Annapurna Circuit Short Trek
Annapurna Luxury Trek
Annapurna Panorama Trek
Annapurna Tilicho Lake Trek
Short Annapurna Trek
Manaslu Circuit Trek
Manaslu Circuit Trekking
Tsum Valley with Manaslu Trek
Upper Mustang Trek
Upper Mustang Trek with Yara
Upper Mustang Tiji Festival Trek
Langtang Valley Trek
Langtang Gosaikunda Trek
Tamang Heritage Trek
Tamang Heritage Trail
Helambu Trek
Annapurna Base Camp Trek  
Eve

In [11]:
px.bar(df, x='Trek', title="Barplot for Different Trekking Places")

In [12]:
df['Trek'] = df['Trek'].str.replace('[^a-zA-Z\s]', '', regex=True).astype('object')
df['Time'] = df['Time'].str.replace(r'Days|days','', regex=True).astype('int64')
df['Max Altitude'] = df['Max Altitude'].str.replace(r'm|,','', regex=True).astype('int64')

In [13]:
df['Trip Grade'].unique()

array(['Moderate', 'Strenuous', 'Easy To Moderate', 'Easy',
       'Moderate+Demanding', 'Demanding', 'Light+Moderate',
       'Demanding+Challenging', 'Light', 'Moderate-Hard', 'Easy-Moderate'],
      dtype=object)

In [14]:
df['Trip Grade'].value_counts()

Trip Grade
Moderate                 92
Demanding                75
Easy To Moderate         58
Moderate+Demanding       40
Moderate-Hard            30
Easy                     25
Strenuous                23
Demanding+Challenging    20
Easy-Moderate            10
Light+Moderate            5
Light                     5
Name: count, dtype: int64

In [15]:
df.insert(3, "Trip_Grade", pd.cut(df['Max Altitude'],
                           bins=[-np.inf, 3500, 4500, np.inf],
                           labels=['Easy', 'Moderate', 'Difficult']))

In [16]:
df = df.drop(columns={'Trip Grade'})

In [17]:
sex = df['Sex'].unique()

for i in sex:
    print(i)

Non-Binary
Female
Male
Transgender


In [18]:
sex_mapping = {'Non-Binary':'Others', 'Transgender': 'Others'}

df['Sex'] = df['Sex'].replace(sex_mapping)

In [19]:
df['Sex'].value_counts()

Sex
Others    181
Female    103
Male       99
Name: count, dtype: int64

In [20]:
labels = ['Male', 'Female', 'Transgender']
values = [99, 103, 181]

colors = ['#003f5c', '#bc8b4b', '#008080']

fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=0.4, marker=dict(colors=colors))])
fig.update_layout(title_text='Gender Distribution')
fig.show()

In [21]:
df.shape

(383, 6)

In [22]:
df.drop_duplicates(inplace=True)

In [23]:
df.shape

(353, 6)

In [24]:
df

Unnamed: 0,Trek,Time,Trip_Grade,Max Altitude,Sex,Age
0,Everest Base Camp Trek,16,Difficult,5545,Others,31
1,Everest Base Camp Short Trek,14,Difficult,5545,Female,31
2,Everest Base Camp Heli Shuttle Trek,12,Difficult,5545,Female,34
3,Everest Base Camp Heli Trek,11,Difficult,5545,Others,28
4,Everest Base Camp Trek for Seniors,20,Difficult,5545,Others,28
5,Everest Chola Pass Trek,19,Difficult,5545,Others,25
6,Gokyo Lake Renjo La Pass Trek,16,Difficult,5360,Male,31
7,Everest High Passes Trek,22,Difficult,5545,Others,31
8,Short Everest Trek,7,Moderate,3900,Female,28
9,Everest Panorama Trek,10,Moderate,3860,Female,33


## Adding Columns

In [25]:
# df['Carbs (g)'] = None
# df['Protein (g)'] = None
# df['Water (L)'] = None
# df['Iron (mg)'] = None
# df['Antioxidants'] = None

In [26]:
nutrient_values = {
    'Below 3500 ft': {
        'Male': {
            '18-30': {'Carbs': 350, 'Protein': 75, 'Water': 3.7, 'Iron': 8, 'Antioxidants': 5},
            '31-40': {'Carbs': 350, 'Protein': 75, 'Water': 3.7, 'Iron': 8, 'Antioxidants': 5},
            '41-50': {'Carbs': 340, 'Protein': 70, 'Water': 3.5, 'Iron': 8, 'Antioxidants': 5},
        },
        'Female': {
            '18-30': {'Carbs': 300, 'Protein': 60, 'Water': 2.7, 'Iron': 18, 'Antioxidants': 5},
            '31-40': {'Carbs': 290, 'Protein': 55, 'Water': 2.7, 'Iron': 18, 'Antioxidants': 5},
            '41-50': {'Carbs': 280, 'Protein': 50, 'Water': 2.5, 'Iron': 18, 'Antioxidants': 5},
        },
        'Others': {
            '18-30': {'Carbs': 325, 'Protein': 70, 'Water': 3.2, 'Iron': 13, 'Antioxidants': 5},
            '31-40': {'Carbs': 320, 'Protein': 65, 'Water': 3.0, 'Iron': 13, 'Antioxidants': 5},
            '41-50': {'Carbs': 310, 'Protein': 60, 'Water': 3.0, 'Iron': 13, 'Antioxidants': 5},
        },
    },
    '3500-4500 ft': {
        'Male': {
            '18-30': {'Carbs': 360, 'Protein': 80, 'Water': 3.8, 'Iron': 8, 'Antioxidants': 5},
            '31-40': {'Carbs': 360, 'Protein': 80, 'Water': 3.8, 'Iron': 8, 'Antioxidants': 5},
            '41-50': {'Carbs': 350, 'Protein': 75, 'Water': 3.6, 'Iron': 8, 'Antioxidants': 5},
        },
        'Female': {
            '18-30': {'Carbs': 310, 'Protein': 65, 'Water': 2.8, 'Iron': 18, 'Antioxidants': 5},
            '31-40': {'Carbs': 300, 'Protein': 60, 'Water': 2.7, 'Iron': 18, 'Antioxidants': 5},
            '41-50': {'Carbs': 290, 'Protein': 55, 'Water': 2.5, 'Iron': 18, 'Antioxidants': 5},
        },
        'Others': {
            '18-30': {'Carbs': 335, 'Protein': 75, 'Water': 3.5, 'Iron': 13, 'Antioxidants': 5},
            '31-40': {'Carbs': 330, 'Protein': 70, 'Water': 3.3, 'Iron': 13, 'Antioxidants': 5},
            '41-50': {'Carbs': 320, 'Protein': 65, 'Water': 3.2, 'Iron': 13, 'Antioxidants': 5},
        },
    },
    'Above 4500 ft': {
        'Male': {
            '18-30': {'Carbs': 400, 'Protein': 90, 'Water': 4.0, 'Iron': 10, 'Antioxidants': 5},
            '31-40': {'Carbs': 400, 'Protein': 90, 'Water': 4.0, 'Iron': 10, 'Antioxidants': 5},
            '41-50': {'Carbs': 390, 'Protein': 85, 'Water': 3.8, 'Iron': 10, 'Antioxidants': 5},
        },
        'Female': {
            '18-30': {'Carbs': 350, 'Protein': 70, 'Water': 3.5, 'Iron': 18, 'Antioxidants': 5},
            '31-40': {'Carbs': 340, 'Protein': 65, 'Water': 3.3, 'Iron': 18, 'Antioxidants': 5},
            '41-50': {'Carbs': 330, 'Protein': 60, 'Water': 3.0, 'Iron': 18, 'Antioxidants': 5},
        },
        'Others': {
            '18-30': {'Carbs': 375, 'Protein': 80, 'Water': 3.8, 'Iron': 13, 'Antioxidants': 5},
            '31-40': {'Carbs': 370, 'Protein': 75, 'Water': 3.5, 'Iron': 13, 'Antioxidants': 5},
            '41-50': {'Carbs': 360, 'Protein': 70, 'Water': 3.2, 'Iron': 13, 'Antioxidants': 5},
        },
    },
}

In [27]:
def get_nutrient_values(row):
    if row['Max Altitude'] < 3500:
        altitude_category = 'Below 3500 ft'
    elif 3500 <= row['Max Altitude'] <= 4500:
        altitude_category = '3500-4500 ft'
    else:
        altitude_category = 'Above 4500 ft'
    
    age_group = None
    if 18 <= row['Age'] <= 30:
        age_group = '18-30'
    elif 31 <= row['Age'] <= 40:
        age_group = '31-40'
    elif 41 <= row['Age'] <= 50:
        age_group = '41-50'
    
    if age_group:
        sex = row['Sex']
        return pd.Series(nutrient_values[altitude_category][sex][age_group])
    else:
        return pd.Series({'Carbs': None, 'Protein': None, 'Water': None, 'Iron': None, 'Antioxidants': None})

# Applying the function to add nutrient columns
df[['Carbs (g)', 'Protein (g)', 'Water (l)', 'Iron (mg)', 'Antioxidants']] = df.apply(get_nutrient_values, axis=1)

In [28]:
df

Unnamed: 0,Trek,Time,Trip_Grade,Max Altitude,Sex,Age,Carbs (g),Protein (g),Water (l),Iron (mg),Antioxidants
0,Everest Base Camp Trek,16,Difficult,5545,Others,31,370.0,75.0,3.5,13.0,5.0
1,Everest Base Camp Short Trek,14,Difficult,5545,Female,31,340.0,65.0,3.3,18.0,5.0
2,Everest Base Camp Heli Shuttle Trek,12,Difficult,5545,Female,34,340.0,65.0,3.3,18.0,5.0
3,Everest Base Camp Heli Trek,11,Difficult,5545,Others,28,375.0,80.0,3.8,13.0,5.0
4,Everest Base Camp Trek for Seniors,20,Difficult,5545,Others,28,375.0,80.0,3.8,13.0,5.0
5,Everest Chola Pass Trek,19,Difficult,5545,Others,25,375.0,80.0,3.8,13.0,5.0
6,Gokyo Lake Renjo La Pass Trek,16,Difficult,5360,Male,31,400.0,90.0,4.0,10.0,5.0
7,Everest High Passes Trek,22,Difficult,5545,Others,31,370.0,75.0,3.5,13.0,5.0
8,Short Everest Trek,7,Moderate,3900,Female,28,310.0,65.0,2.8,18.0,5.0
9,Everest Panorama Trek,10,Moderate,3860,Female,33,300.0,60.0,2.7,18.0,5.0


## Encoding

In [29]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

df.insert(1, 'Encoded_Trek', label_encoder.fit_transform(df['Trek']))
df.insert(3, 'Encoded_Trip_Grade', label_encoder.fit_transform(df['Trip_Grade']))
df.insert(6, 'Encoded_Sex', label_encoder.fit_transform(df['Sex']))

In [30]:
df

Unnamed: 0,Trek,Encoded_Trek,Time,Encoded_Trip_Grade,Trip_Grade,Max Altitude,Encoded_Sex,Sex,Age,Carbs (g),Protein (g),Water (l),Iron (mg),Antioxidants
0,Everest Base Camp Trek,22,16,0,Difficult,5545,2,Others,31,370.0,75.0,3.5,13.0,5.0
1,Everest Base Camp Short Trek,20,14,0,Difficult,5545,0,Female,31,340.0,65.0,3.3,18.0,5.0
2,Everest Base Camp Heli Shuttle Trek,17,12,0,Difficult,5545,0,Female,34,340.0,65.0,3.3,18.0,5.0
3,Everest Base Camp Heli Trek,18,11,0,Difficult,5545,2,Others,28,375.0,80.0,3.8,13.0,5.0
4,Everest Base Camp Trek for Seniors,23,20,0,Difficult,5545,2,Others,28,375.0,80.0,3.8,13.0,5.0
5,Everest Chola Pass Trek,29,19,0,Difficult,5545,2,Others,25,375.0,80.0,3.8,13.0,5.0
6,Gokyo Lake Renjo La Pass Trek,38,16,0,Difficult,5360,1,Male,31,400.0,90.0,4.0,10.0,5.0
7,Everest High Passes Trek,30,22,0,Difficult,5545,2,Others,31,370.0,75.0,3.5,13.0,5.0
8,Short Everest Trek,66,7,2,Moderate,3900,0,Female,28,310.0,65.0,2.8,18.0,5.0
9,Everest Panorama Trek,34,10,2,Moderate,3860,0,Female,33,300.0,60.0,2.7,18.0,5.0


In [31]:
df['Antioxidants'] = df['Antioxidants'].astype(int)

In [32]:
df.drop(columns={'Trek', 'Trip_Grade', 'Sex'}, inplace=True)

In [33]:
df = df.sort_values(by=df.columns.tolist(), ascending=True)

In [34]:
df.describe()

Unnamed: 0,Encoded_Trek,Time,Encoded_Trip_Grade,Max Altitude,Encoded_Sex,Age,Carbs (g),Protein (g),Water (l),Iron (mg),Antioxidants
count,353.0,353.0,353.0,353.0,353.0,353.0,353.0,353.0,353.0,353.0,353.0
mean,38.603399,14.66289,0.730878,4694.759207,1.195467,29.66289,355.396601,75.439093,3.535127,13.33711,5.0
std,23.794604,4.531733,0.913027,965.364081,0.835152,2.900276,28.754667,8.595543,0.363163,3.2739,0.0
min,0.0,5.0,0.0,1550.0,0.0,25.0,290.0,55.0,2.7,8.0,5.0
25%,16.0,11.0,0.0,4200.0,0.0,28.0,335.0,70.0,3.3,10.0,5.0
50%,40.0,14.0,0.0,5050.0,1.0,29.0,350.0,75.0,3.5,13.0,5.0
75%,58.0,18.0,2.0,5416.0,2.0,32.0,375.0,80.0,3.8,18.0,5.0
max,78.0,27.0,2.0,6340.0,2.0,35.0,400.0,90.0,4.0,18.0,5.0


In [35]:
df.corr()

Unnamed: 0,Encoded_Trek,Time,Encoded_Trip_Grade,Max Altitude,Encoded_Sex,Age,Carbs (g),Protein (g),Water (l),Iron (mg),Antioxidants
Encoded_Trek,1.0,0.056507,0.141924,-0.237743,-0.017818,-0.034176,-0.157841,-0.105197,-0.167464,0.038919,
Time,0.056507,1.0,-0.350875,0.516838,0.003198,0.039098,0.297092,0.156969,0.193128,0.080636,
Encoded_Trip_Grade,0.141924,-0.350875,1.0,-0.660326,0.058007,-0.03114,-0.657083,-0.348703,-0.456349,-0.082661,
Max Altitude,-0.237743,0.516838,-0.660326,1.0,-0.040612,0.018146,0.618362,0.39093,0.472228,0.095643,
Encoded_Sex,-0.017818,0.003198,0.058007,-0.040612,1.0,-0.027843,0.266486,0.365949,0.342601,-0.517706,
Age,-0.034176,0.039098,-0.03114,0.018146,-0.027843,1.0,7.5e-05,-0.106294,-0.115225,-0.061599,
Carbs (g),-0.157841,0.297092,-0.657083,0.618362,0.266486,7.5e-05,1.0,0.918536,0.927165,-0.607842,
Protein (g),-0.105197,0.156969,-0.348703,0.39093,0.365949,-0.106294,0.918536,1.0,0.927882,-0.784126,
Water (l),-0.167464,0.193128,-0.456349,0.472228,0.342601,-0.115225,0.927165,0.927882,1.0,-0.724421,
Iron (mg),0.038919,0.080636,-0.082661,0.095643,-0.517706,-0.061599,-0.607842,-0.784126,-0.724421,1.0,


In [36]:
X = df[['Encoded_Trek', 'Time', 'Max Altitude', 'Encoded_Trip_Grade', 'Encoded_Sex', 'Age']]
Y = df[['Carbs (g)','Protein (g)','Water (l)','Iron (mg)', 'Antioxidants']]

In [37]:
X

Unnamed: 0,Encoded_Trek,Time,Max Altitude,Encoded_Trip_Grade,Encoded_Sex,Age
16,0,9,4210,2,1,28
90,0,9,4210,2,2,27
164,0,9,4210,2,2,28
238,0,9,4210,2,2,29
312,0,9,4210,2,2,32
310,1,11,4210,2,0,30
14,1,11,4210,2,1,31
162,1,11,4210,2,1,34
236,1,11,4210,2,2,26
88,1,11,4210,2,2,34


In [38]:
Y

Unnamed: 0,Carbs (g),Protein (g),Water (l),Iron (mg),Antioxidants
16,360.0,80.0,3.8,8.0,5
90,335.0,75.0,3.5,13.0,5
164,335.0,75.0,3.5,13.0,5
238,335.0,75.0,3.5,13.0,5
312,330.0,70.0,3.3,13.0,5
310,310.0,65.0,2.8,18.0,5
14,360.0,80.0,3.8,8.0,5
162,360.0,80.0,3.8,8.0,5
236,335.0,75.0,3.5,13.0,5
88,330.0,70.0,3.3,13.0,5


In [39]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [40]:
X_train

Unnamed: 0,Encoded_Trek,Time,Max Altitude,Encoded_Trip_Grade,Encoded_Sex,Age
264,19,15,5545,0,2,29
132,46,17,5000,0,0,28
250,74,20,5220,0,2,34
89,42,9,4230,2,1,28
241,15,18,5416,0,0,35
379,76,17,4200,2,0,27
377,2,16,4130,2,1,27
220,12,17,3870,2,0,35
163,42,9,4230,2,1,32
162,1,11,4210,2,1,34


In [41]:
# Standard Scaler

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

In [42]:
X_train

array([[-0.87827055,  0.05164987,  0.90237018, -0.82418553,  0.93891161,
        -0.23339139],
       [ 0.25900019,  0.48643388,  0.34620308, -0.82418553, -1.47910732,
        -0.57440885],
       [ 1.43839207,  1.1386099 ,  0.5707109 , -0.82418553,  0.93891161,
         1.47169594],
       ...,
       [ 0.80657499, -1.03531015, -1.14371247,  1.36845899,  0.93891161,
        -0.91542631],
       [ 1.56475549,  0.70382589, -0.47018901,  1.36845899,  0.93891161,
         0.10762608],
       [-0.83614941, -0.16574213,  0.90237018, -0.82418553, -1.47910732,
         0.44864354]])

## Model Training

In [43]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

lr_model = LinearRegression()

lr_model.fit(X_train, Y_train)

prediction = lr_model.predict(X_test)

mse = mean_squared_error(prediction, Y_test)
r2 = r2_score(Y_test, prediction)

print(f"Mean Squared Error:", mse*100)
print(f"R-squared:", r2*100)

Mean Squared Error: 8331.84463018019
R-squared: 53.353845794673724


In [44]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor()

model.fit(X_train, Y_train)

prediction1 = model.predict(X_test)

mse = mean_squared_error(prediction1, Y_test)
r2 = r2_score(Y_test, prediction1)

print(f"Mean Squared Error:", mse*100)
print(f"R-squared:", r2*100)

Mean Squared Error: 36.7588591549297
R-squared: 99.84812189900649


## New Data Prediction


In [45]:
data = X.iloc[10]
data

Encoded_Trek             2
Time                    14
Max Altitude          4210
Encoded_Trip_Grade       2
Encoded_Sex              2
Age                     28
Name: 87, dtype: int64

In [46]:
data_reshaped = data.values.reshape(1, -1)
data_reshaped

array([[   2,   14, 4210,    2,    2,   28]])

In [47]:
scaler.transform(data_reshaped)

array([[-1.5943299 , -0.16574213, -0.4599841 ,  1.36845899,  0.93891161,
        -0.57440885]])

In [48]:
model.predict(scaler.transform(data_reshaped))

array([[335. ,  75. ,   3.5,  13. ,   5. ]])

## Loading into Pickle file

In [49]:
# import pickle

# pickle.dump(scaler, open("scaler.pkl", "wb"))
# pickle.dump(model, open("rfr_model.pkl", "wb"))