In [None]:
import json
import io
import pandas as pd
from textblob import TextBlob
import matplotlib.pyplot as plt
import re
from bs4 import BeautifulSoup
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import certifi
from transformers import BertTokenizer


In [None]:
def readDataFromJsonFile():
    try:
        with io.open('data.json', 'r', encoding='utf-8') as json_file:
            data = json.load(json_file)
            return data
    except json.decoder.JSONDecodeError as e:
        print(f"Error reading JSON file: {e}")


In [None]:
rawData = readDataFromJsonFile()

details_list = []
for key, value in rawData.items():
    details = value["details"]
    reviews = value["reviews"]
    details_list.append({
            "Movie Name": details["movie_Name"],
            "Genres": ", ".join(details["genre"]),
            "Other": ", ".join(details["other"]),
            "Reviews": reviews
    })

df = pd.DataFrame(details_list)
df = df.explode('Reviews')
print(df)


                  Movie Name                                        Genres  \
0   The Shawshank Redemption       Epic, Period Drama, Prison Drama, Drama   
0   The Shawshank Redemption       Epic, Period Drama, Prison Drama, Drama   
0   The Shawshank Redemption       Epic, Period Drama, Prison Drama, Drama   
0   The Shawshank Redemption       Epic, Period Drama, Prison Drama, Drama   
0   The Shawshank Redemption       Epic, Period Drama, Prison Drama, Drama   
..                       ...                                           ...   
24               City of God  Caper, Coming-of-Age, Gangster, Crime, Drama   
24               City of God  Caper, Coming-of-Age, Gangster, Crime, Drama   
24               City of God  Caper, Coming-of-Age, Gangster, Crime, Drama   
24               City of God  Caper, Coming-of-Age, Gangster, Crime, Drama   
24               City of God  Caper, Coming-of-Age, Gangster, Crime, Drama   

      Other                                            Reviews 

In [44]:
def clean_text(text):
    soup = BeautifulSoup(text, 'html.parser')
    text = soup.get_text()
    text = re.sub(r'\W+', ' ', text.lower())
    return text

df['Cleaned_Review'] = df['Reviews'].apply(clean_text)
print(df.head())


                 Movie Name                                   Genres    Other  \
0  The Shawshank Redemption  Epic, Period Drama, Prison Drama, Drama  1994, A   
0  The Shawshank Redemption  Epic, Period Drama, Prison Drama, Drama  1994, A   
0  The Shawshank Redemption  Epic, Period Drama, Prison Drama, Drama  1994, A   
0  The Shawshank Redemption  Epic, Period Drama, Prison Drama, Drama  1994, A   
0  The Shawshank Redemption  Epic, Period Drama, Prison Drama, Drama  1994, A   

                                             Reviews  \
0  It is no wonder that the film has such a high ...   
0  I'm trying to save you money; this is the last...   
0  This movie is not your ordinary Hollywood flic...   
0  I have never seen such an amazing film since I...   
0  The best movie in history and the best ending ...   

                                      Cleaned_Review  
0  it is no wonder that the film has such a high ...  
0  i m trying to save you money this is the last ...  
0  this mov

In [None]:
def analyze_sentiment(text):
    analysis = TextBlob(text)
    return analysis.sentiment.polarity

df['Sentiment'] = df['Cleaned_Review'].apply(analyze_sentiment)
print(df.head())


In [46]:
def get_sentiment_label(score):
    if score > 0:
        return 'positive'
    elif score < 0:
        return 'negative'
    else:
        return 'neutral'

df['Sentiment_Label'] = df['Sentiment'].apply(get_sentiment_label)


In [None]:
# Visualize sentiment distribution
sns.countplot(df['Sentiment_Label'])
plt.title("Sentiment Distribution")
plt.show()


In [None]:
# Analyze patterns based on genres
genre_counts = df['Genres'].explode().value_counts()
genre_counts.plot(kind='bar')
plt.title("Genre Distribution")
plt.show()


In [None]:
# Analyze patterns based on release year
df[['Release_Year', 'Age_Restriction']] = df['Other'].str.split(',', expand=True)
release_year_counts = df['Release_Year'].value_counts().sort_index()
release_year_counts.plot(kind='bar')
plt.title("Release Year Distribution")
plt.show()


In [None]:
# Analyze patterns based on age restriction
age_restriction_counts = df['Age_Restriction'].value_counts()
age_restriction_counts.plot(kind='bar')
plt.title("Age Restriction Distribution")
plt.show()


In [None]:
# Sentiment classification using Logistic Regression
X_train, X_test, y_train, y_test = train_test_split(df['Cleaned_Review'], df['Sentiment_Label'], test_size=0.1, random_state=100)
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)
model = LogisticRegression()
model.fit(X_train, y_train)
predictions = model.predict(X_test)
print(predictions)
print(f'Accuracy: {accuracy_score(y_test, predictions)}')


In [1]:
# Create an instance of SMOTE with the desired parameters
smote = SMOTE(k_neighbors=2) #This means that for each minority class sample, SMOTE will find its 2 nearest neighbors. It will then create synthetic samples
 
# Pass the SMOTE instance to the smote parameter of SMOTEENN
smote_enn = SMOTEENN(smote=smote)

NameError: name 'SMOTE' is not defined

In [None]:
def preprocess_custom_input(text):
    # Clean the text
    soup = BeautifulSoup(text, 'html.parser')
    cleaned_text = soup.get_text()
    cleaned_text = re.sub(r'\W+', ' ', cleaned_text.lower())

    tokens = cleaned_text.split()
    cleaned_text = ' '.join(tokens)

    return cleaned_text

def predict_sentiment(custom_text):
    # Preprocess the custom input
    cleaned_text = preprocess_custom_input(custom_text)

    # Vectorize the input text
    input_vector = vectorizer.transform([cleaned_text])
    # Predict the sentiment
    prediction = model.predict(input_vector)

    # Return the predicted sentiment
    return prediction[0]

# Example usage
custom_review = "Movie is not good, very bad movie, waste of time"
predicted_sentiment = predict_sentiment(custom_review)
print(f'The predicted sentiment is: {predicted_sentiment}')
