In [None]:
import mlflow
from data_ingest import get_data

In [None]:
import pandas as pd


train_df, test_df = get_data("parulpandey/emotion-dataset")


In [3]:
train_df = train_df[train_df["label"].isin([1, 0])]
test_df = test_df[test_df["label"].isin([1, 0])]


In [4]:
train_df['label'].value_counts()

label
1    5362
0    4666
Name: count, dtype: int64

In [None]:
from preprocess import normalized_sentence,remove_short_sentences

In [None]:

train_df["text"] = train_df["text"].apply(normalized_sentence)
test_df["text"] = test_df["text"].apply(normalized_sentence)


In [6]:
train_data = remove_short_sentences(train_df, "text", 4)
test_data = remove_short_sentences(test_df, "text", 4)


In [7]:
X_train = train_df["text"]
X_test = test_df["text"]


y_train = train_df["label"]
y_test = test_df["label"]


In [8]:
from sklearn.feature_extraction.text import  TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV


In [9]:
pipeline = Pipeline([("feature_transformer",
                      
                       TfidfVectorizer(max_features=1000)),
                         ("model", LogisticRegression())])


In [10]:
param_grid = {
    # "model__penalty": ["l1", "l2", ],
    "model__penalty": ["l1",  ],
    "model__solver": ["liblinear",  ],
    # "model__solver": ["liblinear", "saga", ],
    # "model__C": [ 0.1, 1, 10,],
    "model__C": [ 10,],
    # "model__max_iter": [100, 200, ],
}


In [11]:
scoring = {
    "accuracy": make_scorer(accuracy_score),
    "precision": make_scorer(precision_score, average="weighted"),
    "recall": make_scorer(recall_score, average="weighted"),
}


In [12]:
gs = GridSearchCV(estimator=pipeline,param_grid=param_grid,scoring=scoring,refit="accuracy")

In [18]:
X_train

0                                    didnt feel humiliated
1        go feeling hopeless damned hopeful around some...
5            ive feeling little burdened lately wasnt sure
8        petronas year feel petronas performed well mad...
10          feel like make suffering seeing mean something
                               ...                        
15993    day feel like braeden calvin happy successful day
15995        brief time beanbag said anna feel like beaten
15996    turning feel pathetic still waiting table subb...
15997                             feel strong good overall
15999                         know lot feel stupid portray
Name: text, Length: 10028, dtype: object

In [13]:
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.sklearn.autolog(max_tuning_runs=None)
mlflow.set_experiment(experiment_name="final_model_hp")

with mlflow.start_run():
    gs.fit(X_train,y_train)





🏃 View run agreeable-sheep-221 at: http://localhost:5000/#/experiments/704817498391572697/runs/d10e953f4e7949e0981780c1a36c9f27
🧪 View experiment at: http://localhost:5000/#/experiments/704817498391572697
🏃 View run languid-foal-755 at: http://localhost:5000/#/experiments/704817498391572697/runs/85e97dc6950241b3b0e3065433ed34a1
🧪 View experiment at: http://localhost:5000/#/experiments/704817498391572697


In [14]:
gs.best_params_

{'model__C': 10, 'model__penalty': 'l1', 'model__solver': 'liblinear'}

In [15]:
gs.best_estimator_

In [19]:
gs.predict(
    
        {"im happy mood today feel almost delighted havent done anything different today normally wonderful"}
    
)




array([1], dtype=int64)

In [24]:
gs.best_estimator_.predict({"i was feeling a little vain when i did this one"})

array([0], dtype=int64)