In [61]:
## Plugging Libraries
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.model_selection import train_test_split

In [62]:
## Loading Data
df_train = pd.read_csv('therapy_train.csv')
df_test = pd.read_csv('therapy_test.csv')

In [63]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8000 entries, 0 to 7999
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   emotional_state  8000 non-null   int64 
 1   duration         8000 non-null   int64 
 2   concern_type     8000 non-null   int64 
 3   urgency          8000 non-null   int64 
 4   support_style    8000 non-null   int64 
 5   tech_openness    8000 non-null   int64 
 6   availability     8000 non-null   int64 
 7   clarity          8000 non-null   int64 
 8   label            8000 non-null   object
dtypes: int64(8), object(1)
memory usage: 562.6+ KB


In [64]:
df_train['label'] = df_train['label'].map({'Human':0,'Hybrid':1,'AI':2})
df_test['label'] = df_test['label'].map({'Human':0,'Hybrid':1,'AI':2})

print(df_train['label'])
print(df_test['label'])

0       0
1       0
2       0
3       1
4       1
       ..
7995    0
7996    0
7997    0
7998    1
7999    1
Name: label, Length: 8000, dtype: int64
0       1
1       1
2       0
3       0
4       0
       ..
1995    0
1996    1
1997    1
1998    0
1999    1
Name: label, Length: 2000, dtype: int64


In [65]:
## Splitting Data
X = df_train.drop(columns=['label'])
y = df_train['label']

In [66]:
## Train_Test_Split
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=2025)

In [67]:
## Model building
model = LogisticRegression()
model.fit(X_train,y_train)
preds = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test,preds))
print("Recall:", recall_score(y_test,preds, average='weighted'))
print("Precision:", precision_score(y_test,preds, average='weighted'))

Accuracy: 1.0
Recall: 1.0
Precision: 1.0


In [68]:
df_test_dropped = df_test.drop(columns=['label'])
test_pred = model.predict(df_test_dropped)

submission = pd.DataFrame({
    'ml_pred': test_pred,
    'actual': df_test['label']
})

submission.to_csv('check.csv', index=False)

In [69]:
check = pd.read_csv('check.csv')

check_ml = check['ml_pred']
check_actual = check['actual']

matches = (check_ml == check_actual).sum()

print(f"Number of matching rows: {matches}")

Number of matching rows: 2000


In [70]:
import joblib

joblib.dump(model, 'new_model.pkl')

['new_model.pkl']