In [None]:
import pandas as pd
import numpy as np
class My_KMeans:
    def __init__(self, no_of_clusters, no_of_iteration=1000):
        self.no_of_clusters = no_of_clusters
        self.no_of_iteration = no_of_iteration

    def random_cluster_centers(self, data):
        store_centers = data.sample(n=self.no_of_clusters)
        return store_centers.to_numpy()

    def _create_clusters(self, data, store_centers):
        clusters = [[] for _ in range(self.no_of_clusters)]
        for element in data:
            clusters[self._closest_centroid(store_centers, element)].append(element)
        return clusters

    def update_cluster_center(self, store_centers, clusters):
        new_centers = []
        for center, cluster in zip(store_centers, clusters):
            center = np.mean(cluster, axis=0)
            new_centers.append(center)
        return np.array(new_centers)

    def calculate_distance(self, center, element):
        distance = np.linalg.norm(center - element)
        return distance

    def _closest_centroid(self, store_centers, element):
        min_distance = 100000
        cluster_no = 0
        for i, center in enumerate(store_centers):
            distance = self.calculate_distance(center, element)
            if distance < min_distance:
                min_distance = distance
                cluster_no = i
        return cluster_no

    def fit_function(self, data):

        self.n_features = data.shape[1] # no of columns == no of features
        store_centers = self.random_cluster_centers(data)
        data = data.to_numpy()
        for i in range(self.no_of_iteration):
            clusters = self._create_clusters(data, store_centers)
            new_centers = self.update_cluster_center(store_centers, clusters)

        self.no_of_iteration = i + 1
        self.store_centers = store_centers


    def prediction_function(self, data):
        if isinstance(data, pd.Series):
            data = data.to_frame().T
        if isinstance(data, list):
            data = pd.DataFrame(data)
        if isinstance(data, np.ndarray):
            data = pd.DataFrame(data.reshape(-1, self.n_features))

        clusters = []
        for row in data.iterrows():
            row = row[1]
            cluster_no = self._closest_centroid(self.store_centers, row)
            clusters.append(cluster_no)
        return np.array(clusters)


In [None]:

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:

df = pd.read_csv("/content/drive/MyDrive/Dataset/Depression Analysis/Depression and Happiness Factor Analysis , Maruf's Copy (Responses) - Form Responses 1.csv")


In [None]:

df.drop(["label","On a scale of 1-100, how would you express this feeling?","Timestamp"],axis=1,inplace=True)

df_Encoded = pd.get_dummies(df)



In [None]:
df_Encoded

Unnamed: 0,Age,How much have you succeeded to cope up with the environment of your educational institution?,How long did you sleep last night?(in hours),Which year are you in?_1st year,Which year are you in?_2nd year,Which year are you in?_3rd year,Which year are you in?_4th year,Gender_Male,Your location ?_Department,Your location ?_Hall,Your location ?_Hall-Mess,Your location ?_Home,Your location ?_Playground,Relationship status_Single,Are you happy with your financial state?_No,Are you happy with your financial state?_Yes,Understanding with your family members?_Bad,Understanding with your family members?_Good,Understanding with your family members?_Normal,Are you feeling pressure in your study or work right now?_No,Are you feeling pressure in your study or work right now?_Yes,Are you satisfied with your academic result?_No,Are you satisfied with your academic result?_Yes,Are you happy with your living place?_No,Are you happy with your living place?_Yes,Who supports you when you are not succeeding in your academic life?_Family,Who supports you when you are not succeeding in your academic life?_Friends,Have you used any social media within the last 6 hours?_Yes,Do you have inferiority complex? _Maybe,Do you have inferiority complex? _No,Do you have inferiority complex? _Yes,Are you satisfied with your meal today?_Neutral,Are you satisfied with your meal today?_No,Are you satisfied with your meal today?_Yes,Are you feeling sick/health issues today?_No,Are you feeling sick/health issues today?_Yes,"Have you done any recreational activity (sports, gaming, hobby etc.) today?_No","Have you done any recreational activity (sports, gaming, hobby etc.) today?_Yes"
0,20,2,6.0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,0,1,0,1,0
1,20,2,6.5,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,1,0,1,0,0,1,1,1,0,0,0,1,0,1,0,0,1
2,23,3,3.0,0,0,0,1,1,0,0,0,0,1,1,1,0,0,1,0,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0
3,23,3,7.0,0,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,1,0,0,0,1,1,0,1,0
4,23,3,6.0,0,0,0,1,1,1,0,0,0,0,1,1,0,0,0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1202,20,3,7.0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,1,0,1,1,0
1203,20,5,7.0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,0,1,1,0,0,1,0,1,1,0,1,1,0,0,0,0,1,0,1,0,1
1204,20,3,7.0,1,0,0,0,1,0,0,0,1,0,1,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,1,0,1
1205,20,1,4.0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,1,0,0,0,1,0,1


In [None]:

my_kmeans = My_KMeans(no_of_clusters=5)
my_kmeans.fit_function(df_Encoded)


In [None]:
print("Number of iteration = {}".format(my_kmeans.no_of_iteration))
print("Centers :{}".format(my_kmeans.store_centers))

Number of iteration = 1000
Centers :[[21.  3.  6.  0.  1.  0.  0.  1.  0.  0.  0.  1.  0.  1.  1.  0.  0.  1.
   0.  0.  1.  1.  0.  1.  0.  1.  0.  1.  0.  1.  0.  1.  0.  0.  0.  1.
   1.  0.]
 [21.  2.  3.  0.  1.  0.  0.  1.  0.  0.  0.  1.  0.  1.  0.  1.  1.  0.
   0.  0.  1.  0.  1.  0.  1.  1.  0.  1.  1.  0.  0.  1.  0.  0.  1.  0.
   1.  0.]
 [20.  1.  7.  1.  0.  0.  0.  1.  0.  0.  0.  0.  1.  1.  1.  0.  1.  0.
   0.  0.  1.  1.  0.  0.  1.  1.  0.  1.  1.  0.  0.  0.  1.  0.  1.  0.
   0.  1.]
 [22.  3.  3.  0.  0.  1.  0.  1.  0.  0.  1.  0.  0.  1.  1.  0.  1.  0.
   0.  0.  1.  0.  1.  0.  1.  0.  1.  1.  1.  0.  0.  0.  1.  0.  1.  0.
   0.  1.]
 [22.  5.  7.  0.  0.  1.  0.  1.  1.  0.  0.  0.  0.  1.  1.  0.  0.  0.
   1.  0.  1.  1.  0.  0.  1.  0.  1.  1.  0.  1.  0.  1.  0.  0.  1.  0.
   0.  1.]]


In [None]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=5, random_state=0).fit(df_Encoded)

from sklearn.metrics.cluster import adjusted_rand_score

adjusted_rand_score(kmeans.predict(df_Encoded), my_kmeans.prediction_function(df_Encoded))

0.24074696827465453