# New Section

In [6]:
pip install selenium pandas matplotlib seaborn streamlit


Collecting selenium
  Downloading selenium-4.34.2-py3-none-any.whl.metadata (7.5 kB)
Collecting streamlit
  Downloading streamlit-1.46.1-py3-none-any.whl.metadata (9.0 kB)
Collecting urllib3~=2.5.0 (from urllib3[socks]~=2.5.0->selenium)
  Downloading urllib3-2.5.0-py3-none-any.whl.metadata (6.5 kB)
Collecting trio~=0.30.0 (from selenium)
  Downloading trio-0.30.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.12.2 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting outcome (from trio~=0.30.0->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 

In [None]:
| Title                             | Year | Rating | Genre                        |
| --------------------------------- | ---- | ------ | ---------------------------- |
| Dune: Part Two                    | 2024 | 8.8    | Action, Adventure, Sci-Fi    |
| The Fall Guy                      | 2024 | 7.2    | Action, Comedy               |
| Furiosa: A Mad Max Saga           | 2024 | 7.6    | Action, Adventure, Sci-Fi    |
| Inside Out 2                      | 2024 | 8.3    | Animation, Adventure, Comedy |
| Civil War                         | 2024 | 7.1    | Action, Drama, Thriller      |
| Kingdom of the Planet of the Apes | 2024 | 7.5    | Action, Adventure, Sci-Fi    |
| Challengers                       | 2024 | 7.8    | Drama, Romance, Sport        |


In [None]:
# data_analysis.py
import pandas as pd

df = pd.read_csv("imdb_2024.csv")

# Clean ratings
df['Rating'] = df['Rating'].replace("N/A", pd.NA)
df['Rating'] = pd.to_numeric(df['Rating'])

# Clean year (keep only digits)
df['Year'] = df['Year'].str.extract(r'(\d{4})').astype(int)

# Split genres
df['Genre'] = df['Genre'].str.split(',')

# Explode genres for frequency
genre_df = df.explode('Genre')
genre_df['Genre'] = genre_df['Genre'].str.strip()

# Save cleaned
df.to_csv("imdb_2024_cleaned.csv", index=False)
print("Cleaned data saved.")


In [None]:
# visualization.py
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv("imdb_2024_cleaned.csv")
plt.figure(figsize=(10, 6))
sns.histplot(df['Rating'].dropna(), bins=10, kde=True)
plt.title("Distribution of IMDB Ratings (2024)")
plt.xlabel("Rating")
plt.ylabel("Frequency")
plt.savefig("rating_distribution.png")
plt.show()

# Genre count
genre_df = df.explode('Genre')
genre_df['Genre'] = genre_df['Genre'].str.strip()
genre_counts = genre_df['Genre'].value_counts()

plt.figure(figsize=(12, 6))
genre_counts.plot(kind='bar', color='skyblue')
plt.title("Genre Frequency (Top 50 Movies of 2024)")
plt.xlabel("Genre")
plt.ylabel("Count")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("genre_frequency.png")
plt.show()


In [None]:
# streamlit_app.py
import streamlit as st
import pandas as pd

st.title("IMDB 2024 Movies Dashboard")
df = pd.read_csv("imdb_2024_cleaned.csv")

genre_list = sorted(set([g.strip() for sublist in df['Genre'].dropna().apply(eval) for g in sublist]))

selected_genre = st.selectbox("Select Genre", genre_list)

# Filter by genre
filtered_df = df[df['Genre'].apply(lambda x: selected_genre in eval(x) if pd.notna(x) else False)]

st.subheader(f"Movies in Genre: {selected_genre}")
st.dataframe(filtered_df[['Title', 'Rating', 'Year']].sort_values(by='Rating', ascending=False))

st.subheader(" Rating Distribution")
st.bar_chart(filtered_df['Rating'].value_counts().sort_index())

st.subheader(" Year Distribution")
st.bar_chart(filtered_df['Year'].value_counts().sort_index())


In [None]:
| Title                   | Rating | Year |
| ----------------------- | ------ | ---- |
| Dune: Part Two          | 8.8    | 2024 |
| Furiosa: A Mad Max Saga | 7.6    | 2024 |
| Civil War               | 7.1    | 2024 |


In [10]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [20]:
response = requests.get(url)
soup = BeautifulSoup(response.content,'html.parser')

In [21]:
movie_data = soup.findAll('div',attrs = {'class':'lister-item mode-advanced'})

  movie_data = soup.findAll('div',attrs = {'class':'lister-item mode-advanced'})


In [22]:
movie_name = []
year = []
time = []
genre = []
rating = []
metascore = []
director = []
stars = []
votes = []
gross = []

for data in movie_data:
    name = data.h3.a.text
    movie_name.append(name)

    year1 = data.h3.find('span',attrs = {'class':'lister-item-year text-muted unbold'}).text.replace('(','').replace(')','')
    year.append(year1)

    time1 = data.p.find('span',attrs = {'class': 'runtime'}).text.replace(' min','')
    time.append(time1)

    genre1 = data.p.find('span',attrs = {'class': 'genre'}).text.replace('\n','').replace(' ','')
    genre.append(genre1)

    rating1 = data.find('div',attrs = {'class': 'inline-block ratings-imdb-rating'}).text.replace('\n','')
    rating.append(rating1)

    meta = data.find('span', attrs = {'class': 'metascore'}).text.replace(' ','') if data.find('span',attrs = {'class': 'metascore' }) else '###'
    metascore.append(meta)

    director1 = data.find('p',attrs = {'class': ''}).a.text
    director.append(director1)

    stars1 = data.find('p',attrs = {'class': ''}).text.replace('\n','').replace(' ','').replace('|',',').split(',')[1:]
    stars1 = ','.join(stars1).replace('Stars:','')
    stars.append(stars1)

    values = data.findAll('span',attrs = {'name': 'nv'})

    votes1 = values[0].text.replace(',','')
    votes.append(votes1)

    gross1 = values[1].text.replace('$','').replace('M','') if len(values)==3 else '!!!!!!!!!'
    gross.append(gross1)
