In [20]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import random
import category_encoders as ce
from imblearn.over_sampling import SMOTE
from sklearn.metrics import f1_score
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.utils import to_categorical
from sklearn.metrics import classification_report, accuracy_score

# Training Model

In [21]:
class DataProcessor:
   def __init__(self):
      pass

   def split_activities(self, data, col_activities):
      data_new = data[col_activities].str.split(' \| ', expand=True)
      data_new.columns = ['col' + str(i) for i in range(1, len(data_new.columns)+1)]
      return data_new

   def make_unique_activities(self, data, clust_dict):
      for col in data.columns:
         data[col] = data[col].apply(lambda x: x.strip().lower() if pd.notna(x) else x)
         data[col] = data[col].map({item: cluster for cluster, activity_list in clust_dict.items() for item in activity_list})

      data_unique = pd.DataFrame()

      for row in range(len(data)):
         unique_values = data.iloc[row].unique()
         df_row = pd.DataFrame(unique_values).T
         data_unique = pd.concat([data_unique, df_row], ignore_index=True)

      
      data_unique.rename(columns={i: 'activity_'+str(i+1) for i in range(data_unique.shape[1])}, inplace=True)
      return data_unique

   def combine_to_first_data_and_drop(self, first_data, second_data, drop_feature):
      df_combined = first_data.join(second_data)
      df_combined.drop(drop_feature, axis=1, inplace=True)
      df_combined['sub_mood'] = df_combined['sub_mood'].str.capitalize()
      df_combined['sub_mood'] = df_combined['sub_mood'].str.strip()
      return df_combined

   def add_new_feature_is_weekend(self, data, new_feature):
      data[new_feature] = np.where((data['weekday'] == 'Saturday') | (data['weekday'] == 'Sunday'), 1, 0)
      return data

   def fill_activities(self, data, column):
      data = data[data[column].notna()]
      data = data.fillna(value=0)
      return data

In [22]:
class DataModeling:
   def __init__(self):
      self.ce = ce.OrdinalEncoder(cols=['mood'], mapping=[{'col': 'mood', 'mapping': {'Awful': 0, 'Bad': 1, 'Normal': 2, 'Good': 3, 'Amazing': 4}}])
      self.smote = SMOTE()
      self.model_ann = None

   def split_data(self, data, train_prop, test_prop, label):
      train_size = int(train_prop * len(data))

      shuffled_index = random.sample(range(len(data)), len(data))

      train_ann = data.iloc[shuffled_index[:train_size]]
      test_ann = data.iloc[shuffled_index[train_size:]]

      X_train = train_ann.drop(columns=label)
      y_train = train_ann[label]
      X_test = test_ann.drop(columns=label)
      y_test = test_ann[label]

      return X_train, X_test, y_train, y_test

   def encoding(self, X_train, X_test, y_train, y_test):
      X_train = pd.get_dummies(X_train)
      X_test = pd.get_dummies(X_test)

      y_train = self.ce.fit_transform(y_train)
      y_test = self.ce.transform(y_test)

      return X_train, X_test, y_train, y_test
   
   def add_columns_X(self, X, list_columns):
      for num in range(1,9):
         for item in list_columns:
            if 'activity_'+str(num)+'_'+item not in X.columns:
               X['activity_'+str(num)+'_'+item] = 0
      return X

   def add_missing_column(self, X_train, X_test, y_train, y_test):
      # Add missing columns to X_train and fill with value 0
      missing_cols_train = set(X_test.columns) - set(X_train.columns)
      for col in missing_cols_train:
         X_train[col] = 0

      # Add missing columns to X_test and fill with value 0
      missing_cols_test = set(X_train.columns) - set(X_test.columns)
      for col in missing_cols_test:
         X_test[col] = 0

      # Make sure column order is the same
      X_train = X_train.reindex(sorted(X_train.columns), axis=1)
      X_test = X_test.reindex(sorted(X_test.columns), axis=1)

      return X_train, X_test, y_train, y_test
   
   def apply_oversampling(self, X_train, y_train):
      X_train, y_train = self.smote.fit_resample(X_train, y_train)
      return X_train, y_train
   
   def build_ann_model(self, input_dim):
      self.model_ann = Sequential()
      self.model_ann.add(Dense(64, activation='relu', input_dim=input_dim))
      self.model_ann.add(Dense(64, activation='relu'))
      self.model_ann.add(Dropout(0.5))
      self.model_ann.add(Dense(len(self.ce.mapping[0]['mapping']), activation='softmax'))
      self.model_ann.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
   
   def train_model_ann(self, y_train, y_test):
      y_train = to_categorical(y_train)
      y_test = to_categorical(y_test)
      return y_train, y_test
   
   def train_model(self, X_train, y_train, input_dim, epochs=100, batch_size=32, validation_data=None):
      self.build_ann_model(input_dim),
      self.model_ann.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=validation_data)
   
   def predict(self, X_test):
      y_pred = self.model_ann.predict(X_test)
      return y_pred

   def evaluate(self, X_test, y_test):
      y_pred = self.predict(X_test)
      y_pred_classes = np.argmax(y_pred, axis=1)
      y_true = np.argmax(y_test, axis=1)
      accuracy = accuracy_score(y_true, y_pred_classes)
      report = classification_report(y_true, y_pred_classes)
      return accuracy, report
    
   def save_models(self, name_model):
      return self.model_ann.save(name_model)

In [23]:
df = pd.read_csv('D:\Pribadi\Bangkit\Capstone\Daylio_Abid.csv')
df.head()

Unnamed: 0,full_date,date,weekday,time,sub_mood,activities,mood
0,16/04/2021,Apr-16,Friday,8:00 pm,yolo,reading | Art | prayer | fasting | walk | med...,Good
1,15/04/2021,Apr-15,Thursday,2:37 am,focused,reading | learning | Art | prayer | fasting ...,Good
2,14/04/2021,Apr-14,Wednesday,2:39 am,confused,reading | learning | prayer | fasting | Qura...,Normal
3,13/04/2021,Apr-13,Tuesday,2:38 am,wondering,reading | learning | Art | prayer | fasting ...,Normal
4,12/04/2021,Apr-12,Monday,9:52 pm,angry,reading | learning | fasting | walk | medita...,Awful


In [24]:
df['sub_mood'].nunique()

22

In [25]:
processor = DataProcessor()
# split aktivitas
df_new = processor.split_activities(df, 'activities')

# buat unique value aktivitas
clust_dict = {
    'Reading and Learning': ['research', 'reading', 'learning', 'language learning', 'news update', 'coding'],
    'Spiritual': ['quran', 'prayer', 'kaballah', 'meditation', 'holotropic', 'fasting'],
    'Social': ['friends', 'party', 'family', 'penpal', 'shopping'],
    'Physical and Travel': ['exercise', 'travel', 'walk', 'hiking'],
    'Self-pleasure and Entertainment': ['gaming', 'reddit', 'watching series', 'audio books', 'streaming', 'dota 2', 'movies', 'songs', 'podcast', 'youtube', 'shower', 'trimming', 'shave', 'good meal', 'power nap'],
    'Creative': ['writing', 'art', 'poetry', 'designing', 'recording', 'video editing', 'documentary', 'write dairy'],
    'Home': ['cleaning', 'cooking'],
    'Other': ['weight log', 'love', 'jobs', 'tutorial', 'new things', 'phd', 'email', 'repair']
    }
df_new = processor.make_unique_activities(df_new, clust_dict)

# Combine data
list_drop = ['activities', 'full_date', 'date', 'time', 'activity_9']
df_new = processor.combine_to_first_data_and_drop(df, df_new, list_drop)

# Add new feature
df_new = processor.add_new_feature_is_weekend(df_new, 'is_weekend')

# Fill activities
df_new = processor.fill_activities(df_new, 'activity_1')

In [26]:
modeling = DataModeling()

# Memisahkan data menjadi data pelatihan dan data pengujian
X_train, X_test, y_train, y_test = modeling.split_data(df_new, train_prop=0.7, test_prop=0.3, label='mood')

# Melakukan encoding variabel kategorikal pada data
X_train, X_test, y_train, y_test = modeling.encoding(X_train, X_test, y_train, y_test)

list_of_activity = list(clust_dict.keys())
X_train = modeling.add_columns_X(X_train, list_of_activity)
X_test = modeling.add_columns_X(X_test, list_of_activity)

# Menambahkan kolom yang hilang pada data pelatihan dan data pengujian
X_train, X_test, y_train, y_test = modeling.add_missing_column(X_train, X_test, y_train, y_test)

# Menerapkan oversampling pada data pelatihan
X_train, y_train = modeling.apply_oversampling(X_train, y_train)

y_train, y_test = modeling.train_model_ann(y_train, y_test)

# Memproses dan melatih model dengan data yang telah diproses
modeling.train_model(X_train, y_train, input_dim=X_train.shape[1], epochs=100, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [27]:
y_pred = modeling.predict(X_test)

accuracy, report = modeling.evaluate(X_test, y_test)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.996268656716418
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.92      0.96        12
           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00        45
           3       1.00      1.00      1.00       144
           4       0.98      1.00      0.99        53

    accuracy                           1.00       268
   macro avg       1.00      0.98      0.99       268
weighted avg       1.00      1.00      1.00       268



In [28]:
modeling.save_models('model_ann.h5')

# Data Input User

In [29]:
def split_activities(data, col_activities):
    data_new = data[col_activities].str.split(' \| ', expand=True)
    data_new.columns = ['activity_'+str(i+1) for i in range(data_new.shape[1])]

    return data_new

def make_unique_activities(data, clust_dict):
    for col in data.columns:
        data[col] = data[col].apply(lambda x: x.strip().lower() if pd.notna(x) else x)
        data[col] = data[col].map({item: cluster for cluster, activity_list in clust_dict.items() for item in activity_list})

    data_unique = pd.DataFrame()

    for row in range(len(data)):
        unique_values = data.iloc[row].unique()
        df_row = pd.DataFrame(unique_values).T
        data_unique = pd.concat([data_unique, df_row], ignore_index=True)

    data_unique.rename(columns={i: 'activity_'+str(i+1) for i in range(data_unique.shape[1])}, inplace=True)
    return data_unique

def combine_to_first_data_and_drop(first_data, second_data, drop_feature):
    df_combined = first_data.join(second_data)
    df_combined.drop(drop_feature, axis=1, inplace=True)
    
    return df_combined

def add_new_feature_is_weekend(data, new_feature):
    data[new_feature] = np.where((data['weekday'] == 'Saturday') | (data['weekday'] == 'Sunday'), 1, 0)
    return data

def fill_activities(data, column):
    data = data[data[column].notna()]
    data = data.fillna(value=0)
    return data

def encoding(data):
    data = pd.get_dummies(data)
    return data

def add_columns_activity_data(data, list_columns):
      for num in range(1,9):
         for item in list_columns:
            if 'activity_'+str(num)+'_'+item not in data.columns:
               data['activity_'+str(num)+'_'+item] = 0
            if num > 1:
                data['activity_'+str(num)+'_'+'0'] = 0
      return data

def add_columns_submood_data(data, list_columns):
    for item in list_columns:
        if 'sub_mood_' + item not in data.columns:
            data['sub_mood_' + item] = 0
    return data

def add_columns_weekday_data(data, list_columns):
    for item in list_columns:
        if 'weekday_'+item not in data.columns:
            data['weekday_'+item] = 0
    return data

def preprocess(data):
    df_new = split_activities(data, 'activities')

    # buat unique value aktivitas
    clust_dict = {
        'Reading and Learning': ['research', 'reading', 'learning', 'language learning', 'news update', 'coding'],
        'Spiritual': ['quran', 'prayer', 'kaballah', 'meditation', 'holotropic', 'fasting'],
        'Social': ['friends', 'party', 'family', 'penpal', 'shopping'],
        'Physical and Travel': ['exercise', 'travel', 'walk', 'hiking'],
        'Self-pleasure and Entertainment': ['gaming', 'reddit', 'watching series', 'audio books', 'streaming', 'dota 2', 'movies', 'songs', 'podcast', 'youtube', 'shower', 'trimming', 'shave', 'good meal', 'power nap'],
        'Creative': ['writing', 'art', 'poetry', 'designing', 'recording', 'video editing', 'documentary', 'write dairy'],
        'Home': ['cleaning', 'cooking'],
        'Other': ['weight log', 'love', 'jobs', 'tutorial', 'new things', 'phd', 'email', 'repair']
        }
    # df_new = make_unique_activities(df_new, clust_dict)

    # gabung dat
    list_drop = ['activities', 'full_date', 'date', 'time']
    df_new = combine_to_first_data_and_drop(data, df_new, list_drop)

    # Add new feature
    df_new = add_new_feature_is_weekend(df_new, 'is_weekend')

    # Fill activities
    df_new = fill_activities(df_new, 'activity_1')
    df_new = encoding(df_new)

    # menambahkan kolom aktivitas
    list_of_activity = list(clust_dict.keys())
    df_new = add_columns_activity_data(df_new, list_of_activity)

    # menambahkan kolom submood
    list_of_submood = ['Yolo','Focused','Confused','Wondering','Angry','Blessed','Excited','Chill','Hungry','Happiest day',
                       'Weak','Meh','Awful','Cool','Worried','Over the moon','Triggered','Sad af','Scared','Good','Bad','Sick']
    df_new = add_columns_submood_data(df_new, list_of_submood)

    # menambahkan kolom weekday
    list_of_day = ['Friday', 'Thursday', 'Wednesday', 'Tuesday', 'Monday', 'Sunday', 'Saturday']
    df_new = add_columns_weekday_data(df_new, list_of_day)
    
    return df_new

In [30]:
lst_inp = [
    ["16/04/2021"], 
     ["Apr-16"], 
      ["Sunday"], 
       ["8:00 pm"], 
        ["Cool"], 
         ["Reading and Learning | Self-pleasure and Entertainment | Social"],
        ]
df_inp = pd.DataFrame(lst_inp).transpose()
df_inp.columns=['full_date', 'date', 'weekday', 'time', 'sub_mood', 'activities']

In [31]:
col_train = ['activity_1_Creative','activity_1_Home','activity_1_Other','activity_1_Physical and Travel','activity_1_Reading and Learning',
            'activity_1_Self-pleasure and Entertainment','activity_1_Social','activity_1_Spiritual','activity_2_0','activity_2_Creative',
            'activity_2_Home','activity_2_Other','activity_2_Physical and Travel','activity_2_Reading and Learning','activity_2_Self-pleasure and Entertainment',
            'activity_2_Social','activity_2_Spiritual','activity_3_0','activity_3_Creative','activity_3_Home','activity_3_Other',
            'activity_3_Physical and Travel','activity_3_Reading and Learning','activity_3_Self-pleasure and Entertainment','activity_3_Social',
            'activity_3_Spiritual','activity_4_0','activity_4_Creative','activity_4_Home','activity_4_Other','activity_4_Physical and Travel',
            'activity_4_Reading and Learning','activity_4_Self-pleasure and Entertainment','activity_4_Social','activity_4_Spiritual','activity_5_0',
            'activity_5_Creative','activity_5_Home','activity_5_Other','activity_5_Physical and Travel','activity_5_Reading and Learning',
            'activity_5_Self-pleasure and Entertainment','activity_5_Social','activity_5_Spiritual','activity_6_0','activity_6_Creative',
            'activity_6_Home','activity_6_Other','activity_6_Physical and Travel','activity_6_Reading and Learning','activity_6_Self-pleasure and Entertainment',
            'activity_6_Social','activity_6_Spiritual','activity_7_0','activity_7_Creative','activity_7_Home','activity_7_Other','activity_7_Physical and Travel',
            'activity_7_Reading and Learning','activity_7_Self-pleasure and Entertainment','activity_7_Social','activity_7_Spiritual','activity_8_0',
            'activity_8_Creative','activity_8_Home','activity_8_Other','activity_8_Physical and Travel','activity_8_Reading and Learning',
            'activity_8_Self-pleasure and Entertainment','activity_8_Social','activity_8_Spiritual','is_weekend','sub_mood_Angry','sub_mood_Awful',
            'sub_mood_Bad','sub_mood_Blessed','sub_mood_Chill','sub_mood_Confused','sub_mood_Cool','sub_mood_Excited','sub_mood_Focused','sub_mood_Good',
            'sub_mood_Happiest day','sub_mood_Hungry','sub_mood_Meh','sub_mood_Over the moon','sub_mood_Sad af','sub_mood_Scared','sub_mood_Sick',
            'sub_mood_Triggered','sub_mood_Weak','sub_mood_Wondering','sub_mood_Worried','sub_mood_Yolo','weekday_Friday','weekday_Monday',
            'weekday_Saturday','weekday_Sunday','weekday_Thursday','weekday_Tuesday','weekday_Wednesday']

In [32]:
result = preprocess(df_inp)[col_train]
result.shape

(1, 101)

In [33]:
model_ann = keras.models.load_model("D:\Pribadi\Bangkit\Capstone\model_ann.h5")
pred = model_ann.predict(result)
np.argmax(pred, axis=1)[0]



4

In [36]:
result.columns

Index(['activity_1_Creative', 'activity_1_Home', 'activity_1_Other',
       'activity_1_Physical and Travel', 'activity_1_Reading and Learning',
       'activity_1_Self-pleasure and Entertainment', 'activity_1_Social',
       'activity_1_Spiritual', 'activity_2_0', 'activity_2_Creative',
       ...
       'sub_mood_Wondering', 'sub_mood_Worried', 'sub_mood_Yolo',
       'weekday_Friday', 'weekday_Monday', 'weekday_Saturday',
       'weekday_Sunday', 'weekday_Thursday', 'weekday_Tuesday',
       'weekday_Wednesday'],
      dtype='object', length=101)

In [37]:
X_train.columns

Index(['activity_1_Creative', 'activity_1_Home', 'activity_1_Other',
       'activity_1_Physical and Travel', 'activity_1_Reading and Learning',
       'activity_1_Self-pleasure and Entertainment', 'activity_1_Social',
       'activity_1_Spiritual', 'activity_2_0', 'activity_2_Creative',
       ...
       'sub_mood_Wondering', 'sub_mood_Worried', 'sub_mood_Yolo',
       'weekday_Friday', 'weekday_Monday', 'weekday_Saturday',
       'weekday_Sunday', 'weekday_Thursday', 'weekday_Tuesday',
       'weekday_Wednesday'],
      dtype='object', length=101)

To do:

- Karena inputan untuk activity dari user app nanti sudah sesuai, maka fungsi make_unique_activities sudah tidak diperlukan lagi
- Buat fungsi yang sama pada weekday dan submood seperti fungsi add_columns_activity_data
- Perhatikan pada data train, ada kolom aktivitas yang memiliki subjek 0

DONE

In [34]:
df = df_

new_columns = ['Reading and Learning',
 'Spiritual',
 'Social',
 'Physical and Travel',
 'Self-pleasure and Entertainment',
 'Creative',
 'Home',
 'Other']

df.columns = new_columns
df

NameError: name 'df_' is not defined

In [None]:
for i in range(len(df)):
    for j in df.iloc[i].unique():
        if j != df.columns[i]:
            df.iloc[i] = df.iloc[i].replace(j, df.columns[i])
df

Unnamed: 0,Reading and Learning,Spiritual,Social,Physical and Travel,Self-pleasure and Entertainment,Creative,Home,Other
0,Reading and Learning,Reading and Learning,Reading and Learning,Reading and Learning,Reading and Learning,Reading and Learning,Reading and Learning,Reading and Learning
1,Spiritual,Spiritual,Spiritual,Spiritual,Spiritual,Spiritual,Spiritual,Spiritual
2,Social,Social,Social,Social,Social,Social,Social,Social
3,Physical and Travel,Physical and Travel,Physical and Travel,Physical and Travel,Physical and Travel,Physical and Travel,Physical and Travel,Physical and Travel


In [None]:
list(clust_dict.keys())

['Reading and Learning',
 'Spiritual',
 'Social',
 'Physical and Travel',
 'Self-pleasure and Entertainment',
 'Creative',
 'Home',
 'Other']