### Finding the most used words in the data for Movie and Anime Subreddits

In [16]:
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"

In [17]:
import plotly.graph_objects as go
import pandas as pd

# Your data
data = {'word': ['movie', 'like', 'look', 'watch', 'im', 'film', 'good', 'see', 'love', 'something'],
        'count': [72187, 27869, 17926, 17677, 15842, 14465, 13604, 10841, 9366, 8858]}

df = pd.DataFrame(data)

# Create a Plotly table
fig = go.Figure(data=[go.Table(
    header=dict(values=['Most Occured Words', 'Word Count'],
                fill_color='#FF4301',
                font=dict(color='white', size=12)),
    cells=dict(values=[df['word'], df['count']])
)])

# Update layout
fig.update_layout(
    title='Word Count for Movie Reddit Submission',
    margin=dict(l=0, r=0, t=50, b=0),
    paper_bgcolor="White",
    width=800, 
    height=300 
)

fig.write_html(f"../../data/plots/word_count_table_movie.html")

# Show the figure
fig.show()


In [18]:
data2 = {'word': ['anime', 'like', 'watch', 'look', 'im', 'good', 'one', 'something', 'mc', 'character'],
         'count': [73632, 45544, 34677, 23720, 23468, 19125, 17144, 16997, 15713, 14876]}

df2 = pd.DataFrame(data2)

In [19]:
# Create a Plotly table
fig = go.Figure(data=[go.Table(
    header=dict(values=['Most Occured Words', 'Word Count'],
                fill_color='#ff9200',
                font=dict(color='white', size=12)),
    cells=dict(values=[df2['word'], df2['count']])
)])

# Update layout
fig.update_layout(
    title='Word Count for Anime Reddit Submission',
    margin=dict(l=0, r=0, t=50, b=0),
    paper_bgcolor="White",
    width=800, 
    height=300 
)

fig.write_html(f"../../data/plots/word_count_table_anime.html")

# Show the figure
fig.show()



Looking at the words people use the most, it seems like people on Reddit really enjoy chatting about their favorite anime and movies. Now, we're going to dig into the comments and pick out the names of the movies and anime that everyone is talking about the most. This way, we can find out which ones are super popular among the community.

MOST HIGLY RATED MOVIES

In [1]:
import pandas as pd
import altair as alt

In [2]:
positive_movies = pd.read_csv("../../data/csv/20_positive_movieReviews.csv")

In [50]:
positive_movies.head(5)

Unnamed: 0,title,average_positive_score,average_negative_score,num_reviews,normalized_positive_score,normalized_negative_score,weighted_rating,rank,rating
0,Jim Allison: Breakthrough,1.0,8.531452e-22,48,48.0,4.095097e-20,0.980967,1,98.096718
1,Hava Nagila (The Movie),0.986757,0.01324329,116,114.463778,1.536221,0.978951,2,97.89506
2,Trifling Women,0.999248,0.0007515981,44,43.96693,0.03307032,0.978692,3,97.869215
3,Blinky Bill the Movie,0.998073,0.001927482,44,43.915191,0.08480921,0.977614,4,97.761426
4,Molly's Theory of Relativity,0.999998,1.942952e-06,40,39.999922,7.771807e-05,0.977505,5,97.750491


In [24]:
#add a column rank from 1 to 20
positive_movies['rank'] = positive_movies.index + 1
positive_movies['rating'] = positive_movies['weighted_rating']*100

In [56]:
# Creating the Altair bar chart
chart = alt.Chart(positive_movies).mark_bar(color = '#006400',opacity=0.5).encode(
    y=alt.Y('title', title='Movie Title', sort='-x'),
    x=alt.X('rating', title='Movie Rating', scale=alt.Scale(domain=[95, 100])),
    tooltip=['title', 'rating']
).properties(
    title='Movies with the Highest Positive Ratings',
    width=800,
    height=400
)


In [57]:
# Configuring title font size
chart = chart.configure_title(
    fontSize=20  # Adjust the title font size as needed
)

# Configuring axis label font size
chart = chart.configure_axis(
    labelFontSize=14,
    titleFontSize=16
)

In [58]:
chart

In [49]:
negative_movies = pd.read_csv('../../data/csv/20_negative_movieReviews.csv')

In [51]:
negative_movies['rating'] = negative_movies['weighted_rating']*100

In [77]:
negative_movies

Unnamed: 0,title,average_positive_score,average_negative_score,num_reviews,normalized_positive_score,normalized_negative_score,weighted_rating,rating
0,Dinner With the President: A Nation's Journey,0.001345,0.998655,12,0.01614,11.98386,0.810848,81.084789
1,Fangs,0.000113,0.999887,8,0.000903,7.999097,0.749067,74.906688
2,Elephant Tales,0.002093,0.997907,8,0.016745,7.983255,0.747747,74.774676
3,Arisaka,0.00549,0.99451,8,0.043917,7.956083,0.745482,74.548237
4,The Phantom Planet,0.009694,0.990306,8,0.077551,7.922449,0.74268,74.267956
5,12 in a Box,0.002758,0.997242,6,0.016546,5.983454,0.697316,69.731595
6,Audrie & Daisy,0.19787,0.80213,16,3.16592,12.83408,0.691189,69.11893
7,A Tree of Life: The Pittsburgh Synagogue Shooting,0.112926,0.887074,9,1.016336,7.983664,0.690259,69.025924
8,Tears of Gaza,0.163763,0.836237,12,1.965156,10.034844,0.689034,68.903436
9,Moscow Zero,0.104067,0.895933,8,0.832539,7.167461,0.679764,67.976394


In [78]:
# Creating the Altair bar chart
chart_neg = alt.Chart(negative_movies).mark_bar(color = '#8B0000',opacity=0.5).encode(
    y=alt.Y('title', title='Movie Title', sort='-x'),
    x=alt.X('rating', title='Movie Rating', scale=alt.Scale(domain=[60, 85])),
    tooltip=['title', 'rating']
).properties(
    title='Movies with the Highest Negative Ratings',
    width=800,
    height=400
)


In [79]:
# Configuring title font size
chart_neg = chart_neg.configure_title(
    fontSize=20  # Adjust the title font size as needed
)

# Configuring axis label font size
chart_neg = chart_neg.configure_axis(
    labelFontSize=14,
    titleFontSize=16
)

In [80]:
chart_neg

  for col_name, dtype in df.dtypes.iteritems():
