In [1]:
# Importing required libraries
import pandas as pd
import numpy as np

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy import create_engine
import psycopg2
import config
from config import db_password

# Machine model imports 
from path import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [2]:
# load in the DB
subtypes_df = pd.read_csv('../Resources/random_forest.csv')
subtypes_df

Unnamed: 0,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,...,gender,engnat,age,hand,religion,orientation,race,voted,married,familysize
0,5,5,3,5,5,5,5,3,4,5,...,1,2,28,1,2,1,5,2,1,1
1,5,5,5,5,5,3,5,5,1,4,...,2,1,14,1,1,2,4,2,1,1
2,2,4,1,2,2,2,4,2,2,4,...,2,2,26,1,1,1,4,1,1,2
3,5,4,1,2,4,5,4,1,4,5,...,1,1,25,1,12,1,4,1,1,3
4,5,4,1,4,4,5,4,3,1,5,...,1,1,37,1,2,2,4,2,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2487,5,2,2,2,4,4,2,2,1,4,...,1,1,32,1,2,1,4,2,1,3
2488,1,1,1,1,1,3,1,1,1,1,...,2,2,25,2,2,1,4,1,1,2
2489,5,5,1,5,5,5,5,1,3,1,...,2,2,34,1,12,2,5,0,2,2
2490,2,1,4,1,1,1,1,3,1,2,...,1,2,19,1,1,1,4,1,1,2


In [3]:
subtypes_df["SubType"] = ""
subtypes_df.head()

Unnamed: 0,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,...,engnat,age,hand,religion,orientation,race,voted,married,familysize,SubType
0,5,5,3,5,5,5,5,3,4,5,...,2,28,1,2,1,5,2,1,1,
1,5,5,5,5,5,3,5,5,1,4,...,1,14,1,1,2,4,2,1,1,
2,2,4,1,2,2,2,4,2,2,4,...,2,26,1,1,1,4,1,1,2,
3,5,4,1,2,4,5,4,1,4,5,...,1,25,1,12,1,4,1,1,3,
4,5,4,1,4,4,5,4,3,1,5,...,1,37,1,2,2,4,2,2,2,


In [4]:
def cal_subtype(row):
    GM = (row["Q1"] + row["Q6"] + row["Q11"])/3
    MG = (row["Q2"] + row["Q7"] + row["Q12"])/3
    ET = (row["Q3"] + row["Q8"] + row["Q13"])/3
    PW = (row["Q4"] + row["Q9"] + row["Q14"])/3
    CI = (row["Q5"] + row["Q10"] + row["Q15"])/3
    res=[]
    if GM > 4.5:
        res.append("GM")
    if MG > 4.5:
        res.append("MG")
    if ET > 4.5:
        res.append("ET")    
    if PW > 4.5:
        res.append("PW") 
    if CI > 4.5:
        res.append("CI") 
    if len(res)==0:
        res.append("N/A")
#     print("GM", GM)
#     print("MG", MG)
#     print("ET", ET)
#     print("PW", PW)
#     print("CI", CI)
    return "-".join(res)

In [5]:
for i, row in subtypes_df.iterrows():
    subtypes_df.loc[i,"SubType"] = cal_subtype(row)

In [6]:
y=subtypes_df["SubType"]
X=subtypes_df.drop(columns=["SubType",'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6', 'Q7', 'Q8', 'Q9', 'Q10', 'Q11',
       'Q12', 'Q13', 'Q14', 'Q15'])

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [8]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

In [9]:
# Fitting the model
rf_model = rf_model.fit(X_train, y_train)

In [10]:
# Making predictions using the testing data.
predictions = rf_model.predict(X_test)

In [11]:
# Calculating the accuracy score.
acc_score = accuracy_score(y_test, predictions)
acc_score

0.6051364365971108

In [12]:
# Connect to sql database 
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/conspiracy_theories_final"
engine = create_engine(db_string)
subtypes_df.to_sql(name='subtypes', con=engine, if_exists = 'replace')

In [13]:
#import table from sql database 
subtypes_df = pd.read_sql_table("subtypes", con=engine, index_col="index")
subtypes_df.head()

Unnamed: 0_level_0,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,...,engnat,age,hand,religion,orientation,race,voted,married,familysize,SubType
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,5,5,3,5,5,5,5,3,4,5,...,2,28,1,2,1,5,2,1,1,GM-MG-PW-CI
1,5,5,5,5,5,3,5,5,1,4,...,1,14,1,1,2,4,2,1,1,MG-ET-CI
2,2,4,1,2,2,2,4,2,2,4,...,2,26,1,1,1,4,1,1,2,
3,5,4,1,2,4,5,4,1,4,5,...,1,25,1,12,1,4,1,1,3,GM-CI
4,5,4,1,4,4,5,4,3,1,5,...,1,37,1,2,2,4,2,2,2,GM-CI
