In [10]:
import requests
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from nltk.stem import WordNetLemmatizer
from io import StringIO
import re
import nltk

In [16]:
# Download the csv data
url = 'https://raw.githubusercontent.com/MSTAM-MDC/cai-app/master/appstore_instagram_reviews_anonymized.csv'
response = requests.get(url)
reviews_df = pd.read_csv(StringIO(response.text))
reviews_df

Unnamed: 0,appId,country,date,id,score,text,title,url,userName,userUrl,version
0,389801252,DE,2024-03-20T22:37:03-07:00,11067766035,1,"When I want to share any stories, posts or ree...",List of friends in weird order,https://itunes.apple.com/de/review?id=38980125...,988858b9f96ee718616ad5dc2496ff911ff86bd790caad...,https://itunes.apple.com/de/reviews/id159880897,323.0.0
1,389801252,DE,2024-03-20T22:17:05-07:00,11067726970,4,I have been using the app for a week now. I ca...,Great companion,https://itunes.apple.com/de/review?id=38980125...,06dfad50909f5bc85a0a4198d91da9e5d5ae9954e8bba6...,https://itunes.apple.com/de/reviews/id1333052389,323.0.0
2,389801252,DE,2024-03-20T16:08:40-07:00,11066866484,1,"Instagram, was ist nur mit euch los? Es ist fr...",#Instagram #Fehler #Verbesserung dringend nötig,https://itunes.apple.com/de/review?id=38980125...,4b8d13b92ce712f53651cb538b1c079daa782136547a20...,https://itunes.apple.com/de/reviews/id711608090,323.0.0
3,389801252,DE,2024-03-20T15:54:39-07:00,11066832683,1,Die neue Schriftart ist so unglaublich hässlich.,Hässliche Layout,https://itunes.apple.com/de/review?id=38980125...,52acf5c35b9fa5653f8fb7b9ab7ec305e0ead4f07708bc...,https://itunes.apple.com/de/reviews/id830638504,323.0.0
4,389801252,DE,2024-03-20T14:59:23-07:00,11066699753,1,Null zufrieden darum gelöscht,Instagram absoluter Müll geworden sorry aber e...,https://itunes.apple.com/de/review?id=38980125...,f31334bd164a58cffae342f04040b0f7cb2ad9416ed0b0...,https://itunes.apple.com/de/reviews/id1193916555,323.0.0
...,...,...,...,...,...,...,...,...,...,...,...
495,389801252,US,2024-03-20T10:15:45-07:00,11065969963,1,Not able to use music since update. I can’t ma...,broken update,https://itunes.apple.com/us/review?id=38980125...,fc9515e3c6cf290fe45251386d5070916358c65759e05a...,https://itunes.apple.com/us/reviews/id1497906061,323.0.0
496,389801252,US,2024-03-20T10:14:51-07:00,11065967408,4,My insta keeps glitching out and the notes are...,Notes??,https://itunes.apple.com/us/review?id=38980125...,d314e04ed703c3dafe206dcf4e73bd8ac4ed9ded79be68...,https://itunes.apple.com/us/reviews/id1369652389,323.0.0
497,389801252,US,2024-03-20T10:12:51-07:00,11065961530,1,Ion appreciate how y’all took my notes on both...,Instagram,https://itunes.apple.com/us/review?id=38980125...,d38ef2ab5f2fb0cdf4183c415f8e0bba9176ed031e9329...,https://itunes.apple.com/us/reviews/id1499593735,323.0.0
498,389801252,US,2024-03-20T10:03:08-07:00,11065933178,5,"I love this app, Although my algorithm isn’t t...",Instagram Algorithm,https://itunes.apple.com/us/review?id=38980125...,07cbdff3fc7a99a2ef468ccf4fd10b301f3216dd095de4...,https://itunes.apple.com/us/reviews/id1356468993,323.0.0


In [17]:
# Initialize the lemmatizer
nltk.download('wordnet')
lemmatizer = WordNetLemmatizer()

# Function to clean and lemmatize text
def clean_and_lemmatize(text):
    # Remove non-alphabetic characters and split into words
    words = re.sub(r"[^a-zA-Z]", " ", text).split()
    # Lemmatize each word
    return ' '.join([lemmatizer.lemmatize(word.lower()) for word in words])

# Download the Tweets.csv data
url = 'https://raw.githubusercontent.com/MSTAM-MDC/cai-app/master/appstore_instagram_reviews_anonymized.csv'
response = requests.get(url)
reviews_df = pd.read_csv(StringIO(response.text))

# Preprocess the text data
reviews_df['text'] = reviews_df['text'].apply(clean_and_lemmatize)

# Map the sentiment to binary values (assuming positive sentiment as 1, and negative/neutral as 0)
reviews_df['score'] = reviews_df['score'].apply(lambda x: 1 if x > 3 else 0)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(reviews_df['text'], reviews_df['score'], test_size=0.2, random_state=42)

# Create a pipeline with TF/IDF vectorizer and Logistic Regression
model = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', LogisticRegression())
])

# Train the model
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, zero_division=1))
print("Recall:", recall_score(y_test, y_pred, zero_division=1))
print("F1-Score:", f1_score(y_test, y_pred, zero_division=1))

# Function to preprocess and predict input text
def preprocess_and_predict(input_text):
    # Preprocess the input text
    cleaned_text = clean_and_lemmatize(input_text)

    # Make a prediction
    prediction = model.predict([cleaned_text])

    # Return the predicted class (0 or 1)
    return prediction[0]

# Example usage:
# Replace 'Your input text here' with your text to classify
input_text = "Your input text here"
predicted_class = preprocess_and_predict(input_text)
print(f"The predicted class for the input text is: {predicted_class}")

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\MSTAM\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Accuracy: 0.75
Precision: 1.0
Recall: 0.07407407407407407
F1-Score: 0.13793103448275862
The predicted class for the input text is: 0


In [18]:
reviews_df

Unnamed: 0,appId,country,date,id,score,text,title,url,userName,userUrl,version
0,389801252,DE,2024-03-20T22:37:03-07:00,11067766035,0,when i want to share any story post or reel th...,List of friends in weird order,https://itunes.apple.com/de/review?id=38980125...,988858b9f96ee718616ad5dc2496ff911ff86bd790caad...,https://itunes.apple.com/de/reviews/id159880897,323.0.0
1,389801252,DE,2024-03-20T22:17:05-07:00,11067726970,1,i have been using the app for a week now i can...,Great companion,https://itunes.apple.com/de/review?id=38980125...,06dfad50909f5bc85a0a4198d91da9e5d5ae9954e8bba6...,https://itunes.apple.com/de/reviews/id1333052389,323.0.0
2,389801252,DE,2024-03-20T16:08:40-07:00,11066866484,0,instagram wa ist nur mit euch los e ist frustr...,#Instagram #Fehler #Verbesserung dringend nötig,https://itunes.apple.com/de/review?id=38980125...,4b8d13b92ce712f53651cb538b1c079daa782136547a20...,https://itunes.apple.com/de/reviews/id711608090,323.0.0
3,389801252,DE,2024-03-20T15:54:39-07:00,11066832683,0,die neue schriftart ist so unglaublich h sslich,Hässliche Layout,https://itunes.apple.com/de/review?id=38980125...,52acf5c35b9fa5653f8fb7b9ab7ec305e0ead4f07708bc...,https://itunes.apple.com/de/reviews/id830638504,323.0.0
4,389801252,DE,2024-03-20T14:59:23-07:00,11066699753,0,null zufrieden darum gel scht,Instagram absoluter Müll geworden sorry aber e...,https://itunes.apple.com/de/review?id=38980125...,f31334bd164a58cffae342f04040b0f7cb2ad9416ed0b0...,https://itunes.apple.com/de/reviews/id1193916555,323.0.0
...,...,...,...,...,...,...,...,...,...,...,...
495,389801252,US,2024-03-20T10:15:45-07:00,11065969963,0,not able to use music since update i can t mak...,broken update,https://itunes.apple.com/us/review?id=38980125...,fc9515e3c6cf290fe45251386d5070916358c65759e05a...,https://itunes.apple.com/us/reviews/id1497906061,323.0.0
496,389801252,US,2024-03-20T10:14:51-07:00,11065967408,1,my insta keep glitching out and the note are gone,Notes??,https://itunes.apple.com/us/review?id=38980125...,d314e04ed703c3dafe206dcf4e73bd8ac4ed9ded79be68...,https://itunes.apple.com/us/reviews/id1369652389,323.0.0
497,389801252,US,2024-03-20T10:12:51-07:00,11065961530,0,ion appreciate how y all took my note on both ...,Instagram,https://itunes.apple.com/us/review?id=38980125...,d38ef2ab5f2fb0cdf4183c415f8e0bba9176ed031e9329...,https://itunes.apple.com/us/reviews/id1499593735,323.0.0
498,389801252,US,2024-03-20T10:03:08-07:00,11065933178,1,i love this app although my algorithm isn t th...,Instagram Algorithm,https://itunes.apple.com/us/review?id=38980125...,07cbdff3fc7a99a2ef468ccf4fd10b301f3216dd095de4...,https://itunes.apple.com/us/reviews/id1356468993,323.0.0


In [19]:
import pickle

# Save the model to the disk
filename = 'finalized_model.pkl'
pickle.dump(model, open(filename, 'wb'))