In [1]:
# Import the dependencies.
from pathlib import Path
from sqlalchemy import create_engine, text, Column, Integer, String, Date
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session
import pandas as pd
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

# Needed for decision tree visualization
import pydotplus
from IPython.display import Image

### Load and Preprocess Data

In [2]:
# Load student data
def LoadData():
    #Load PostGres database
    userName = "postgres"
    password = "postgres" #use your postgres password if you changed it
    database = "MLearning" 
    engine = create_engine(f"postgresql+psycopg2://{userName}:{password}@localhost:5432/{database}")
    conn = engine.connect()

    #Pull the data into pandas. We are filtering by the period    
    try:
        # Query All Records in the the Database
        query = text("SELECT * FROM Student_Data")
        student_df = pd.read_sql(query, conn)
        
    except:
        print("No data Available")
        
    return student_df

In [3]:
# Create a base class for declarating class definitions to produce Table objects
Base = declarative_base()

In [4]:
class Student(Base):
    __tablename__ = "Student_Data"
    
    Gender  = Column(String),
    Age = Column(String),
    Education_Level = Column(String),
    Institution_Type = Column(String),
    IT_Student = Column(String),
    Student_Location = Column(String),
    Load_Shedding = Column(String),
    Financial_Condition = Column(String),
    Internet_Type = Column(String),
    Network_Type = Column(String),
    Class_Duration = Column(String),
    Self_Lms = Column(String),
    Device = Column(String),
    Adaptivity_Level = Column(String),
    Student_ID = Column(Integer, primary_key = True)

  class Student(Base):
  class Student(Base):
  class Student(Base):
  class Student(Base):
  class Student(Base):
  class Student(Base):
  class Student(Base):
  class Student(Base):
  class Student(Base):
  class Student(Base):
  class Student(Base):
  class Student(Base):
  class Student(Base):
  class Student(Base):


In [5]:
# Load into dataframe
student_df = LoadData()
student_df = student_df.rename(columns={"adaptivity_level": "adaptability_level"})

In [6]:
# Drop network_type column 
student_df = student_df.drop(['network_type'], axis=1)
student_df.head()

Unnamed: 0,student_id,gender,age,education_level,institution_type,it_student,student_location,load_shedding,financial_condition,internet_type,class_duration,self_lms,device,adaptability_level
0,1,Boy,21-25,University,Non Government,No,Yes,Low,Mid,Wifi,3-6,No,Tab,Moderate
1,2,Girl,21-25,University,Non Government,No,Yes,High,Mid,Mobile Data,1-3,Yes,Mobile,Moderate
2,3,Girl,16-20,College,Government,No,Yes,Low,Mid,Wifi,1-3,No,Mobile,Moderate
3,4,Girl,11-15,School,Non Government,No,Yes,Low,Mid,Mobile Data,1-3,No,Mobile,Moderate
4,5,Girl,16-20,School,Non Government,No,Yes,Low,Poor,Mobile Data,0,No,Mobile,Low


### Separate the features `X` from the target `y`

In [7]:
# Define deatures set
X = student_df.copy()
X.drop("adaptability_level", axis=1, inplace=True)
X.head()

Unnamed: 0,student_id,gender,age,education_level,institution_type,it_student,student_location,load_shedding,financial_condition,internet_type,class_duration,self_lms,device
0,1,Boy,21-25,University,Non Government,No,Yes,Low,Mid,Wifi,3-6,No,Tab
1,2,Girl,21-25,University,Non Government,No,Yes,High,Mid,Mobile Data,1-3,Yes,Mobile
2,3,Girl,16-20,College,Government,No,Yes,Low,Mid,Wifi,1-3,No,Mobile
3,4,Girl,11-15,School,Non Government,No,Yes,Low,Mid,Mobile Data,1-3,No,Mobile
4,5,Girl,16-20,School,Non Government,No,Yes,Low,Poor,Mobile Data,0,No,Mobile


In [8]:
# Define target vector
y = student_df["adaptability_level"].ravel()
y[:5]

array(['Moderate', 'Moderate', 'Moderate', 'Moderate', 'Low'],
      dtype=object)

### Encode the categorical variables from the features data using `get_dummies`

In [9]:
X = pd.get_dummies(X)

In [10]:
X.head()

Unnamed: 0,student_id,gender_Boy,gender_Girl,age_1-5,age_11-15,age_16-20,age_21-25,age_26-30,age_6-10,education_level_College,...,internet_type_Mobile Data,internet_type_Wifi,class_duration_0,class_duration_1-3,class_duration_3-6,self_lms_No,self_lms_Yes,device_Computer,device_Mobile,device_Tab
0,1,1,0,0,0,0,1,0,0,0,...,0,1,0,0,1,1,0,0,0,1
1,2,0,1,0,0,0,1,0,0,0,...,1,0,0,1,0,0,1,0,1,0
2,3,0,1,0,0,1,0,0,0,1,...,0,1,0,1,0,1,0,0,1,0
3,4,0,1,0,1,0,0,0,0,0,...,1,0,0,1,0,1,0,0,1,0
4,5,0,1,0,0,1,0,0,0,0,...,1,0,1,0,0,1,0,0,1,0


In [11]:
y = pd.get_dummies(y)
y.head()

Unnamed: 0,High,Low,Moderate
0,0,0,1
1,0,0,1
2,0,0,1
3,0,0,1
4,0,1,0


### Separate the data into training and testing subsets

In [12]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=80)

### Scale the data using `StandardScaler`

In [13]:
# Creating StandardScaler instance
scaler = StandardScaler()

# Fit the training data to the standard scaler
X_scaler = scaler.fit(X_train)

# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### Fitting the Random Forest Model

In [16]:
from sklearn.ensemble import RandomForestClassifier
# Create a random forest classifier
rf_model = RandomForestClassifier(n_estimators=500, random_state=78)

In [17]:
# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

### Make predictions using the Random Forest Model

In [18]:
# Making predictions using the testing data
predictions = rf_model.predict(X_test_scaled)

### Model Evaluation

In [14]:
# Model Accuracy
# print('Test Acc: %.3f' % model.score(X_test, y_test))

In [15]:
# Calculate the classification report
# predictions = model.predict(X_test)
# print(classification_report(y_test, predictions,
                           # target_names=target_names))