In [None]:
import pandas as pd
import numpy as np

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:

from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
class DecisionTree:
    class Question:
        def __init__(self, column,value):
            self.column = column
            self.value = value
            
    class Node:
        def __init__(self,question,trueNode,falseNode,leafNode,prediction):
            self.question = question
            self.leafNode = leafNode
            self.trueNode = trueNode
            self.falseNode = falseNode
            self.prediction = prediction

    def fit(self,x_train,y_train):
        data = x_train
        data["label"] = y_train
        gain, question = self.find_feature(data)
        leafNode=False
        predictions=None
        trueNode=None
        falseNode=None
        if gain==0:
            leafNode = True
            predictions = self.classCount(data)
        else:
            trueBranch,falseBranch = self.branchTree(question,data)
            trueNode = self.train(trueBranch)
            falseNode = self.train(falseBranch)
        self.rootNode = self.Node(question,trueNode,falseNode,leafNode,predictions)
        
    def train(self,data):
        gain, question=self.find_feature(data)
        leafNode=False
        predictions=None
        trueNode=None
        falseNode=None
        if gain==0:
            leafNode = True
            predictions = self.classCount(data)
        else:
            trueBranch,falseBranch=self.branchTree(question,data)
            trueNode = self.train(trueBranch)
            falseNode = self.train(falseBranch)
        return self.Node(question,trueNode,falseNode,leafNode,predictions)
    
    def classCount(self,data):
        probability = data.groupby("label")["label"].count().to_dict()
        for key in probability.keys():
            probability[key] = (probability[key]/len(data))
        return probability
    
    def gini(self,data):
        counts = self.classCount(data)
        impurity = 1
        for lbl in counts:
            prob_of_lbl = counts[lbl] / float(len(data))
            impurity -= prob_of_lbl**2
        return impurity
    
    def info_gain(self,left, right, Impurity):
        p = float(len(left)) / (len(left) + len(right))
        q = float(len(right)) / (len(left) + len(right))
        return Impurity - p * self.gini(left) - q * self.gini(right)
    
    def find_feature(self,data):
        gain = 0
        question = None
        current_uncertainty = self.gini(data)
        for col in data.drop("label",axis=1):
            values = data[col].unique()
            for val in values:
                q = self.Question(col,val)
                truenode,falsenode = self.branchTree(q,data)
                if len(truenode)==0 or len(falsenode)==0:
                    continue
                g = self.info_gain(truenode, falsenode, current_uncertainty)
                if g >= gain:
                    gain, question = g, q
        return gain,question
    
    def branchTree(self,question,data):
        truenode = data[data[question.column]==question.value]
        falsenode = data[data[question.column]!=question.value]
        return truenode,falsenode
        
    def predict(self,data,probability=False):
        if isinstance(data,pd.Series):
            data=data.to_frame().T
        result=[]
        for row in data.iterrows():
            row=row[1]
            node=self.rootNode
            while not node.leafNode:
                if row[node.question.column]==node.question.value:
                    node=node.trueNode
                else:
                    node=node.falseNode
            if probability:
                result.append(node.prediction)
            else:
                result.append(max(node.prediction, key=node.prediction.get))
        return result

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Dataset/Depression Analysis/Depression and Happiness Factor Analysis , Maruf's Copy (Responses) - Form Responses 1.csv")

 # df = pd.read_csv("/content/drive/MyDrive/Dataset/Depression Analysis/My_dataset11.csv");

 df.head()

In [None]:
Class_Status = df["label"]
df.drop(["label","On a scale of 1-100, how would you express this feeling?","Timestamp"],axis=1,inplace=True)

In [None]:
df.head()

Unnamed: 0,Which year are you in?,Gender,Age,Your location ?,Relationship status,Are you happy with your financial state?,How much have you succeeded to cope up with the environment of your educational institution?,Understanding with your family members?,Are you feeling pressure in your study or work right now?,Are you satisfied with your academic result?,Are you happy with your living place?,Who supports you when you are not succeeding in your academic life?,Have you used any social media within the last 6 hours?,Do you have inferiority complex?,Are you satisfied with your meal today?,Are you feeling sick/health issues today?,"Have you done any recreational activity (sports, gaming, hobby etc.) today?",How long did you sleep last night?(in hours)
0,1st year,Male,20,Hall,Single,Yes,2,Normal,Yes,No,No,Friends,Yes,Maybe,No,No,No,6.0
1,1st year,Male,20,Hall,Single,No,2,Normal,Yes,No,No,Friends,Yes,Maybe,No,No,Yes,6.5
2,4th year,Male,23,Playground,Single,No,3,Good,Yes,Yes,No,Friends,Yes,Yes,No,Yes,No,3.0
3,4th year,Male,23,Hall,Single,Yes,3,Normal,No,Yes,Yes,Family,Yes,No,Yes,No,No,7.0
4,4th year,Male,23,Department,Single,No,3,Normal,Yes,Yes,Yes,Family,Yes,No,Neutral,Yes,Yes,6.0


In [None]:
x_train, x_test, y_train, y_test = train_test_split(df, Class_Status, test_size= 0.4, random_state=1)

In [None]:
 
model = DecisionTree()
model.fit(x_train,y_train)
 
y_pred = model.predict(x_train) 
y_pred = model.predict(x_test)
 
 
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
print(metrics.classification_report(y_pred,y_test))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


              precision    recall  f1-score   support

         Bad       0.51      0.48      0.50        95
        Good       0.56      0.51      0.54       107
      Normal       0.92      0.94      0.93        96
    Very bad       0.45      0.46      0.46        89
   Very good       0.51      0.56      0.53        96

    accuracy                           0.59       483
   macro avg       0.59      0.59      0.59       483
weighted avg       0.59      0.59      0.59       483

