In [1]:
global PRODUCTION_COMPANY
PRODUCTION_COMPANY = "Warner Bros."

global COMPANY_CODE
COMPANY_CODE = "WBD"

**Helper Methods**

In [2]:
from datetime import datetime, timedelta


def get_dates_around(date_str):
    # Parse the input date string to a datetime object
    date = datetime.strptime(date_str, "%Y-%m-%d")

    # Calculate 15 days before and after the given date
    date_before = date - timedelta(days=15)
    date_after = date + timedelta(days=15)

    # Format the dates back to string in 'YYYY-MM-DD' format
    date_before_str = date_before.strftime("%Y-%m-%d")
    date_after_str = date_after.strftime("%Y-%m-%d")

    return date_before_str, date_after_str


def convert_date(date_str):
    # Parse the input date string to a datetime object
    date = datetime.strptime(date_str, "%Y-%m-%d")

    # Format the datetime object to the desired format
    formatted_date = date.strftime("%Y %B %d")

    return formatted_date


# Function to sort movies by vote_average in descending order
def sort_movies_by_rating(movies_list):
    return sorted(movies_list, key=lambda x: x["vote_average"], reverse=True)


from textblob import TextBlob


def get_sentiment(text):
    """
    Returns the polarity and subjectivity scores for the given text.
    Polarity score is a float within the range [-1, 1], where -1 represents a negative sentiment, and 1 represents a positive sentiment.
    Subjectivity score is a float within the range [0, 1], where 0 represents a very objective text, and 1 represents a very subjective text.
    """
    blob = TextBlob(text)
    polarity = blob.sentiment.polarity
    subjectivity = blob.sentiment.subjectivity

    if polarity > 0:
        sentiment = "Positive"
    elif polarity < 0:
        sentiment = "Negative"
    else:
        sentiment = "Neutral"

    return sentiment, polarity, subjectivity


In [3]:
import json
import requests
from typing import Any
from datetime import datetime


class MovieScraper:
    """
    A class to scrape and filter the latest movies from a specified production company using The Movie Database (TMDb) API.

    Attributes:
    api_key (str): API key for accessing TMDb.

    Methods:
    __init__(api_key: str) -> None:
        Initializes the MovieScraper with the provided API key.

    get_latest_movies(company_name: str) -> list:
        Retrieves the latest movies produced by the specified company. Returns a list of movies, or None if the company is not found or an error occurs.

    filter_movies_by_release_date(movies: list) -> list:
        Filters out movies that have not yet been released. Returns a list of released movies.

    __call__(company_name: str, get_released_only: bool = False) -> dict:
        Fetches the latest movies from the specified company and optionally filters for released movies only. Raises an exception if the production company is not found.
    """

    def __init__(self, api_key: str) -> None:
        """
        Initializes the MovieScraper with the provided API key.

        Parameters:
        api_key (str): The API key for accessing TMDb.
        """
        self.api_key = api_key

    def get_latest_movies(self, company_name: str) -> list:
        """
        Retrieves the latest movies produced by the specified company.

        Parameters:
        company_name (str): The name of the production company.

        Returns:
        list: A list of dictionaries containing movie information, or None if the company is not found or an error occurs.
        """
        search_url = f"https://api.themoviedb.org/3/search/company?api_key={self.api_key}&query={company_name}"
        search_response = requests.get(search_url)

        if search_response.status_code == 200:
            search_data = search_response.json()
            if search_data["results"]:
                company_id = search_data["results"][0]["id"]
            else:
                print(f"No company found with name {company_name}")
                return None
        else:
            print(f"Error: {search_response.status_code}")
            return None

        movies_url = f"https://api.themoviedb.org/3/discover/movie?api_key={self.api_key}&with_companies={company_id}&sort_by=release_date.desc"
        movies_response = requests.get(movies_url)

        if movies_response.status_code == 200:
            movies_data = movies_response.json()
            return movies_data["results"]
        else:
            print(f"Error: {movies_response.status_code}")
            return None

    def filter_movies_by_release_date(self, movies: list) -> list:
        """
        Filters out movies that have not yet been released.

        Parameters:
        movies (list): A list of dictionaries containing movie information.

        Returns:
        list: A list of dictionaries containing only the movies that have been released.
        """
        current_date = datetime.now().date()
        filtered_movies = [
            movie
            for movie in movies
            if datetime.strptime(movie["release_date"], "%Y-%m-%d").date()
            < current_date
        ]
        return filtered_movies

    def __call__(self, company_name: str, get_released_only: bool = False) -> dict:
        """
        Fetches the latest movies from the specified company and optionally filters for released movies only.

        Parameters:
        company_name (str): The name of the production company.
        get_released_only (bool): If True, filters the movies to include only those that have been released.

        Returns:
        dict: A list of dictionaries containing movie information. Raises an exception if the production company is not found.
        """
        latest_movies = self.get_latest_movies(company_name=company_name)

        if latest_movies:
            if get_released_only:
                return self.filter_movies_by_release_date(movies=latest_movies)
            return latest_movies
        else:
            raise Exception("Production company not found!")




In [4]:
import yfinance as yf

class StockScraper:
    
    def __init__(self, yf_ticker : str ) -> None:
        self.yf_ticker = yf_ticker
        
    def __call__(self, start_date: str , end_date : str):
        data = yf.download(self.yf_ticker, start=start_date, end=end_date)
        
        # Extract relevant columns
        data = data[['Open', 'High', 'Low', 'Close', 'Volume']]
        
        data.reset_index(inplace=True)
        
        return data

In [5]:
import pandas as pd
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from sklearn.linear_model import LinearRegression


class Visualizer:
    def __init__(
        self,
        data: pd.DataFrame,
        producer_name: str,
        movie_name: str,
        start_date: str,
        end_date: str,
        fig_height: int = 800,
    ) -> None:

        self.df = data
        self.df["Date"] = pd.to_datetime(self.df["Date"])

        self.fig_height = fig_height

        self.producer_name = producer_name

        self.movie_name = movie_name

        self.start_date = start_date
        self.end_date = end_date

    def plot_o_h_l_c_v(self):
        """This method plots the open, high, low, close of the given data."""

        fig = make_subplots(rows=1, cols=1, specs=[[{"secondary_y": True}]])

        # Add traces for Open, High, Low, Close
        fig.add_trace(
            go.Scatter(
                x=self.df["Date"], y=self.df["Open"], name="Open", mode="lines+markers"
            ),
            secondary_y=True,
        )
        fig.add_trace(
            go.Scatter(
                x=self.df["Date"], y=self.df["High"], name="High", mode="lines+markers"
            ),
            secondary_y=True,
        )
        fig.add_trace(
            go.Scatter(
                x=self.df["Date"], y=self.df["Low"], name="Low", mode="lines+markers"
            ),
            secondary_y=True,
        )
        fig.add_trace(
            go.Scatter(
                x=self.df["Date"],
                y=self.df["Close"],
                name="Close",
                mode="lines+markers",
            ),
            secondary_y=True,
        )

        # Add trace for Volume using a bar chart
        fig.add_trace(
            go.Bar(
                x=self.df["Date"],
                y=self.df["Volume"],
                name="Volume",
                marker_color="grey",
            ),
            secondary_y=False,
        )

        # Update layout for a cleaner look
        fig.update_layout(
            title=f"{self.producer_name} Stock Prices and Volume between {str(convert_date(self.start_date))} and {str(convert_date(self.end_date))} when movie {self.movie_name} was relased.",
            xaxis_title="Date",
            yaxis_title="Price",
            yaxis2_title="Volume",
            legend_title="Data Type",
            template="plotly_white",
        )

        # Set y-axes titles
        fig.update_yaxes(title_text="Price", secondary_y=True)
        fig.update_yaxes(title_text="Volume", secondary_y=False)

        fig.update_layout(height=self.fig_height)

        return fig

    def plot_relase_date(self, release_date):

        # Create a subplot
        fig = make_subplots(rows=1, cols=1)

        # Add traces for Open, High, Low, Close
        fig.add_trace(
            go.Scatter(
                x=self.df["Date"], y=self.df["Open"], name="Open", mode="lines+markers"
            )
        )
        # fig.add_trace(go.Scatter(x=self.df['Date'], y=self.df['Low'], name='Low', mode='lines+markers'))
        # fig.add_trace(go.Scatter(x=self.df['Date'], y=self.df['High'], name='High', mode='lines+markers'))
        fig.add_trace(
            go.Scatter(
                x=self.df["Date"],
                y=self.df["Close"],
                name="Close",
                mode="lines+markers",
            )
        )

        # Highlight April 16th
        highlight_date = release_date
        fig.add_vline(
            x=highlight_date, line_width=3, line_dash="dash", line_color="green"
        )

        # Update layout for a cleaner look
        fig.update_layout(
            title=f"Stock Price Trends with Highlight on movie relase date : {str(convert_date(release_date))}",
            xaxis_title="Date",
            yaxis_title="Price",
            legend_title="Price Type",
            template="plotly_white",
            height=self.fig_height,
        )

        return fig

    def plot_overall_trends(self, release_date):

        after_relase = self.df[self.df["Date"] > release_date]

        # Prepare the data for regression
        X = (after_relase["Date"] - after_relase["Date"].min()).dt.days.values.reshape(
            -1, 1
        )
        y = after_relase["Close"].values

        # Perform linear regression
        model = LinearRegression()
        model.fit(X, y)

        # Predict the trend line
        trend_line = model.predict(X)

        # Create a subplot
        fig = make_subplots(rows=1, cols=1)

        # Add the actual data points for 'Close' price
        fig.add_trace(
            go.Scatter(
                x=self.df["Date"],
                y=self.df["Close"],
                mode="lines+markers",
                name="Close",
            )
        )

        # Add the trend line
        fig.add_trace(
            go.Scatter(
                x=after_relase["Date"],
                y=trend_line,
                mode="lines",
                name=f"Trend after movie relase.",
            )
        )

        # Highlight April 16th
        fig.add_vline(
            x=release_date, line_width=3, line_dash="dash", line_color="green"
        )

        # Update layout for a cleaner look
        fig.update_layout(
            title=f"Stock Price Trends with Trend Line after the relase date {str(convert_date(date_str=release_date))} of {self.movie_name} ",
            xaxis_title="Date",
            yaxis_title="Price",
            legend_title="Price Type",
            template="plotly_white",
            height=self.fig_height,
        )

        # Show the plot
        return fig


In [6]:
# initialize the movie scraper object and get all the latest movies
movie_getter = MovieScraper(api_key="1b37fdfd40bfc02f3e734a5a77f5599d")

# get the latest movies that are relased before today's date 
movies = movie_getter(company_name="Warner Bros.", get_released_only=True)

# sort the movies based on the rating 
movies_sorted = sort_movies_by_rating(movies_list=movies)

In [7]:
movies_sorted

[{'adult': False,
  'backdrop_path': '/shrwC6U8Bkst9T9J7fr1A50n6x6.jpg',
  'genre_ids': [28, 12, 878],
  'id': 786892,
  'original_language': 'en',
  'original_title': 'Furiosa: A Mad Max Saga',
  'overview': 'As the world fell, young Furiosa is snatched from the Green Place of Many Mothers and falls into the hands of a great Biker Horde led by the Warlord Dementus. Sweeping through the Wasteland they come across the Citadel presided over by The Immortan Joe. While the two Tyrants war for dominance, Furiosa must survive many trials as she puts together the means to find her way home.',
  'popularity': 986.382,
  'poster_path': '/iADOJ8Zymht2JPMoy3R7xceZprc.jpg',
  'release_date': '2024-05-22',
  'title': 'Furiosa: A Mad Max Saga',
  'video': False,
  'vote_average': 7.7,
  'vote_count': 821},
 {'adult': False,
  'backdrop_path': '/yyFc8Iclt2jxPmLztbP617xXllT.jpg',
  'genre_ids': [35, 10751, 14],
  'id': 787699,
  'original_language': 'en',
  'original_title': 'Wonka',
  'overview': 'Wi

**Now, let's iter over each of the movies one by one:** 

In [9]:
for item in movies_sorted[:3]:

    is_adult_movie = item["adult"]
    movie_name = item["original_title"]
    overview = item["overview"]
    release_date = item["release_date"]
    rating = item["vote_average"]
    pop = item["popularity"]
    
    print("*" * 20, movie_name, "*" * 20)

    print(f"R rate movie: {is_adult_movie}")
    print(f"Overall Rating : {rating}  Popularity : {pop}")
    print(f"Movie Overview: {overview}\n\n")

    print(f"Let's perform sentiment analysis of the movie:")
    print("-" * 50)
    sentiment, polarity, subjectivity = get_sentiment(text=overview)
    print(f"Sentiment : {sentiment}")
    print(f"Polarity : {polarity}")
    print(f"Subjectivity : {subjectivity}")
    print(f"\n\n")

    print(f"Relase Date: {release_date}")
    # lets get the time period to check the stock market
    date_before_str, date_after_str = get_dates_around(date_str=release_date)
    print(
        f"For the movie {movie_name}, we will be looking stock market between two time period : {date_before_str}--{date_after_str}"
    )

    print("Downloading stock data: ")
    scraper = StockScraper(yf_ticker=COMPANY_CODE)
    scraped_data = scraper(start_date=date_before_str, end_date=date_after_str)

    # initializer the data visualzier class:
    visualizer = Visualizer(
        data=scraped_data,
        producer_name=PRODUCTION_COMPANY,
        movie_name=movie_name,
        start_date=date_before_str,
        end_date=date_after_str,
    )

    all_plot = visualizer.plot_o_h_l_c_v()

    all_plot.show()

    plot_relase_date = visualizer.plot_relase_date(release_date=release_date)
    plot_relase_date.show()

    # plot the overall relase date and regression line
    reg_plot = visualizer.plot_overall_trends(release_date=release_date)
    reg_plot.show()


[*********************100%%**********************]  1 of 1 completed

******************** Furiosa: A Mad Max Saga ********************
R rate movie: False
Overall Rating : 7.7  Popularity : 986.382
Movie Overview: As the world fell, young Furiosa is snatched from the Green Place of Many Mothers and falls into the hands of a great Biker Horde led by the Warlord Dementus. Sweeping through the Wasteland they come across the Citadel presided over by The Immortan Joe. While the two Tyrants war for dominance, Furiosa must survive many trials as she puts together the means to find her way home.


Let's perform sentiment analysis of the movie:
--------------------------------------------------
Sentiment : Positive
Polarity : 0.34
Subjectivity : 0.49000000000000005



Relase Date: 2024-05-22
For the movie Furiosa: A Mad Max Saga, we will be looking stock market between two time period : 2024-05-07--2024-06-06
Downloading stock data: 





[*********************100%%**********************]  1 of 1 completed

******************** Wonka ********************
R rate movie: False
Overall Rating : 7.165  Popularity : 286.868
Movie Overview: Willy Wonka – chock-full of ideas and determined to change the world one delectable bite at a time – is proof that the best things in life begin with a dream, and if you’re lucky enough to meet Willy Wonka, anything is possible.


Let's perform sentiment analysis of the movie:
--------------------------------------------------
Sentiment : Positive
Polarity : 0.3333333333333333
Subjectivity : 0.6583333333333333



Relase Date: 2023-12-06
For the movie Wonka, we will be looking stock market between two time period : 2023-11-21--2023-12-21
Downloading stock data: 





[*********************100%%**********************]  1 of 1 completed

******************** Barbie ********************
R rate movie: False
Overall Rating : 7.053  Popularity : 226.105
Movie Overview: Barbie and Ken are having the time of their lives in the colorful and seemingly perfect world of Barbie Land. However, when they get a chance to go to the real world, they soon discover the joys and perils of living among humans.


Let's perform sentiment analysis of the movie:
--------------------------------------------------
Sentiment : Positive
Polarity : 0.5
Subjectivity : 0.5666666666666667



Relase Date: 2023-07-19
For the movie Barbie, we will be looking stock market between two time period : 2023-07-04--2023-08-03
Downloading stock data: 



