In [1]:
import pandas as pd
from sqlalchemy import create_engine

import sys, os

# Helpers
abspath = os.path.abspath
dirname = os.path.dirname
sep = os.sep
file_ = os.getcwd()

sys.path.append(dirname(abspath(file_)))

import utils.folder_tb as fo
import utils.mining_data_tb as md
import utils.sql_tb as sq

## Data to upload

In [2]:
# Load the data and check that everything is fine
cleaned_data_path = fo.path_to_folder(2, "data" + sep + "7_cleaned_data") + "cleaned_data.csv"
data = pd.read_csv(cleaned_data_path)

print(data.shape)
data.info()

(13446, 16)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13446 entries, 0 to 13445
Data columns (total 16 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   SEQN      13446 non-null  int64  
 1   MCQ160H   13446 non-null  int64  
 2   RIAGENDR  13446 non-null  int64  
 3   RIDAGEYR  13446 non-null  int64  
 4   BPXDI1    13446 non-null  float64
 5   BPXSY1    13446 non-null  float64
 6   BMXWT     13446 non-null  float64
 7   BMXWAIST  13446 non-null  float64
 8   LBXTC     13446 non-null  float64
 9   LBXSGL    13446 non-null  float64
 10  MEANCHOL  13446 non-null  float64
 11  MEANTFAT  13446 non-null  float64
 12  MEANSFAT  13446 non-null  float64
 13  MEANSUGR  13446 non-null  float64
 14  MEANFIBE  13446 non-null  float64
 15  MEANTVB6  13446 non-null  float64
dtypes: float64(12), int64(4)
memory usage: 1.6 MB


In [3]:
model_comparisons_path = fo.path_to_folder(2, "models")

# First model table
model_comparison_noscale_nobalance = pd.read_csv(model_comparisons_path + "model_comparison_noscale_nobalance.csv")
model_comparison_noscale_nobalance.rename(columns = {"Unnamed: 0" : "Model"}, inplace = True)

# Second model table
model_comparison_scale_balance = pd.read_csv(model_comparisons_path + "model_comparison_scale_balance.csv")
model_comparison_scale_balance.rename(columns = {"Unnamed: 0" : "Model"}, inplace = True)

In [4]:
model_comparison_noscale_nobalance

Unnamed: 0,Model,Test_score,Train_score,Test_score_drop,Accuracy,Precision,Recall,F1_score,Confusion_matrix
0,"LogisticRegression(max_iter=300, n_jobs=-1, ra...",0.923048,0.916698,0.006928,0.923048,0.627451,0.145455,0.236162,[[2451 19]\n [ 188 32]]
1,"RandomForestClassifier(max_depth=20, n_jobs=-1...",0.919703,0.972109,-0.05391,0.919703,0.642857,0.040909,0.076923,[[2465 5]\n [ 211 9]]
2,"LogisticRegression(n_jobs=-1, random_state=42)",0.918959,0.916419,0.002772,0.918959,0.52,0.118182,0.192593,[[2446 24]\n [ 194 26]]
3,"RandomForestClassifier(max_depth=10, n_jobs=-1...",0.918587,0.936315,-0.018933,0.918587,0.6,0.013636,0.026667,[[2468 2]\n [ 217 3]]
4,"RandomForestClassifier(n_jobs=-1, random_state...",0.918216,0.971923,-0.055259,0.918216,0.5,0.031818,0.059829,[[2463 7]\n [ 213 7]]
5,"RandomForestClassifier(max_features='sqrt', n_...",0.918216,0.971923,-0.055259,0.918216,0.5,0.031818,0.059829,[[2463 7]\n [ 213 7]]
6,"RandomForestClassifier(max_features='log2', n_...",0.918216,0.971923,-0.055259,0.918216,0.5,0.031818,0.059829,[[2463 7]\n [ 213 7]]
7,KNeighborsClassifier(n_jobs=-1),0.914498,0.918929,-0.004822,0.914498,0.421875,0.122727,0.190141,[[2433 37]\n [ 193 27]]
8,"KNeighborsClassifier(n_jobs=-1, weights='dista...",0.914498,0.969319,-0.056556,0.914498,0.421875,0.122727,0.190141,[[2433 37]\n [ 193 27]]


## SQL connection

In [5]:
# Load server setting into variables
read_json = md.read_json_to_dict("sql_server_settings.json")

IP_DNS = read_json["IP_DNS"]
USER = read_json["USER"]
PASSWORD = read_json["PASSWORD"]
DB_NAME = read_json["DB_NAME"]
PORT = read_json["PORT"]

In [11]:
# Connection with the database
sql_db = sq.MySQL(IP_DNS, USER, PASSWORD, DB_NAME, PORT)
sql_db.connect()

Connected to MySQL server [45395203b_ds_april_2021_db]


<pymysql.connections.Connection at 0x13530b090>

In [7]:
db_connection_str = sql_db.SQL_ALCHEMY
db_connection = create_engine(db_connection_str)

In [8]:
### DON'T EXECUTE THIS ONE

# Uploading the data into the db
# Dataset
data.to_sql("jonathan_suarez_caceres", con = db_connection, index = False)

# First model comparison
model_comparison_scale_balance.to_sql("model_comparison_scale_balance", con = db_connection, index = False)

# Second model comparison
model_comparison_noscale_nobalance.to_sql("model_comparison_noscale_nobalance", con = db_connection, index = False)

In [13]:
# Close connection
sql_db.close()

Close connection with MySQL server [45395203b_ds_april_2021_db]


# Just for testing purposes

In [12]:
# Just for testing purposes

sql_query = '''
SELECT * FROM model_comparison_noscale_nobalance
'''

results = sql_db.execute_get_sql(sql_query)
column_names = [tuple[0] for tuple in sql_db.cursor.description]
pd.DataFrame(results, columns = column_names)

Executing:
 
SELECT * FROM model_comparison_noscale_nobalance



Unnamed: 0,Model,Test_score,Train_score,Test_score_drop,Accuracy,Precision,Recall,F1_score,Confusion_matrix
0,"LogisticRegression(max_iter=300, n_jobs=-1, ra...",0.923048,0.916698,0.006928,0.923048,0.627451,0.145455,0.236162,[[2451 19]\n [ 188 32]]
1,"RandomForestClassifier(max_depth=20, n_jobs=-1...",0.919703,0.972109,-0.05391,0.919703,0.642857,0.040909,0.076923,[[2465 5]\n [ 211 9]]
2,"LogisticRegression(n_jobs=-1, random_state=42)",0.918959,0.916419,0.002772,0.918959,0.52,0.118182,0.192593,[[2446 24]\n [ 194 26]]
3,"RandomForestClassifier(max_depth=10, n_jobs=-1...",0.918587,0.936315,-0.018933,0.918587,0.6,0.013636,0.026667,[[2468 2]\n [ 217 3]]
4,"RandomForestClassifier(n_jobs=-1, random_state...",0.918216,0.971923,-0.055259,0.918216,0.5,0.031818,0.059829,[[2463 7]\n [ 213 7]]
5,"RandomForestClassifier(max_features='sqrt', n_...",0.918216,0.971923,-0.055259,0.918216,0.5,0.031818,0.059829,[[2463 7]\n [ 213 7]]
6,"RandomForestClassifier(max_features='log2', n_...",0.918216,0.971923,-0.055259,0.918216,0.5,0.031818,0.059829,[[2463 7]\n [ 213 7]]
7,KNeighborsClassifier(n_jobs=-1),0.914498,0.918929,-0.004822,0.914498,0.421875,0.122727,0.190141,[[2433 37]\n [ 193 27]]
8,"KNeighborsClassifier(n_jobs=-1, weights='dista...",0.914498,0.969319,-0.056556,0.914498,0.421875,0.122727,0.190141,[[2433 37]\n [ 193 27]]
