### imports

In [4]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

### do data science

In [5]:
# get saved data from file
with open('/home/hiroshin/projects/pycharm-projects/azure-app-service/poke_data.json') as data_file:    
    file = json.load(data_file)


df = pd.DataFrame(file)

display(df)

Unnamed: 0,name,color,shape,base_happiness
0,bulbasaur,green,quadruped,50
1,ivysaur,green,quadruped,50
2,venusaur,green,quadruped,50
3,charmander,red,upright,50
4,charmeleon,red,upright,50
...,...,...,...,...
146,dratini,blue,squiggle,35
147,dragonair,blue,squiggle,35
148,dragonite,brown,upright,35
149,mewtwo,purple,upright,0


In [6]:
# convert categorical variables to int
df['color'] = df['color'].astype('category')
df['shape'] = df['shape'].astype('category')

color_dict = dict(enumerate(df['color'].cat.categories))
shape_dict = dict(enumerate(df['shape'].cat.categories))

print(color_dict)
print(shape_dict)

{0: 'black', 1: 'blue', 2: 'brown', 3: 'gray', 4: 'green', 5: 'pink', 6: 'purple', 7: 'red', 8: 'white', 9: 'yellow'}
{0: 'armor', 1: 'arms', 2: 'ball', 3: 'blob', 4: 'bug-wings', 5: 'fish', 6: 'heads', 7: 'humanoid', 8: 'legs', 9: 'quadruped', 10: 'squiggle', 11: 'tentacles', 12: 'upright', 13: 'wings'}


In [7]:
cat_columns = df.select_dtypes(['category']).columns

df[cat_columns] = df[cat_columns].apply(lambda x: x.cat.codes)
names_df = df.loc[:, ['name']]

names_df.reset_index(inplace=True)
df

Unnamed: 0,name,color,shape,base_happiness
0,bulbasaur,4,9,50
1,ivysaur,4,9,50
2,venusaur,4,9,50
3,charmander,7,12,50
4,charmeleon,7,12,50
...,...,...,...,...
146,dratini,1,10,35
147,dragonair,1,10,35
148,dragonite,2,12,35
149,mewtwo,6,12,0


In [8]:
df_X = df.iloc[:, 1:3]
df_y = df.iloc[:, -1:]

X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size=0.15, random_state=42)

In [9]:
regr = LinearRegression()
regr.fit(X_train, y_train)

y_pred_np = regr.predict(X_test)
y_pred_df = pd.DataFrame(y_pred_np, columns=['base_happiness'])
X_test.reset_index(inplace=True)

In [10]:
result_df = pd.concat([X_test, y_pred_df], axis=1)

named_result_df = result_df.merge(names_df, left_on='index', right_on='index')
named_result_df.sort_values(by='base_happiness', inplace=True)

named_result_df.reset_index(inplace=True)
cols = ['name', 'color', 'shape', 'base_happiness', 'index']

final_df = named_result_df[cols]

final_df['color'] = final_df['color'].apply(lambda x: color_dict[x])
final_df['shape'] = final_df['shape'].apply(lambda x: shape_dict[x])
final_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df['color'] = final_df['color'].apply(lambda x: color_dict[x])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df['shape'] = final_df['shape'].apply(lambda x: shape_dict[x])


Unnamed: 0,name,color,shape,base_happiness,index
0,weedle,brown,armor,54.529791,12
1,vaporeon,blue,quadruped,54.630111,133
2,nidorina,blue,quadruped,54.630111,29
3,dratini,blue,squiggle,54.676373,146
4,omanyte,blue,tentacles,54.722636,137
5,articuno,blue,wings,54.815161,143
6,alakazam,brown,humanoid,54.853628,64
7,dodrio,brown,legs,54.89989,84
8,raticate,brown,quadruped,54.946153,19
9,vulpix,brown,quadruped,54.946153,36


### Exporting our model

In [13]:
from joblib import dump

path = '/home/hiroshin/projects/pycharm-projects/azure-app-service'

dump(regr, '{}/my_model.joblib'.format(path))

In [16]:
# exporting our mapper dictionaries
import json

with open('{}/mappers/color_mapper.json'.format(path), 'w+') as colorfile:
    json.dump(color_dict, colorfile)
with open('{}/mappers/shape_mapper.json'.format(path), 'w+') as shapefile:
    json.dump(shape_dict, shapefile)