In [1]:
global PRODUCTION_COMPANY
PRODUCTION_COMPANY = "Warner Bros."

global COMPANY_CODE
COMPANY_CODE = "WBD"

# the number of movies that we want to analyze, since this an automated script we can select arbitary number of movies to analyze.
N = 5

**Helper Methods**

In [2]:
from datetime import datetime, timedelta


def get_dates_around(date_str):
    """
    Given a date string in the format 'YYYY-MM-DD', calculate the dates 15 days before and after the given date.

    Args:
        date_str (str): The input date string in the format 'YYYY-MM-DD'.

    Returns:
        tuple: A tuple containing two date strings:
               - The date 15 days before the given date.
               - The date 15 days after the given date.
    """
    # Parse the input date string to a datetime object
    date = datetime.strptime(date_str, "%Y-%m-%d")

    # Calculate 15 days before and after the given date
    date_before = date - timedelta(days=2)
    date_after = date + timedelta(days=2)

    # Format the dates back to string in 'YYYY-MM-DD' format
    date_before_str = date_before.strftime("%Y-%m-%d")
    date_after_str = date_after.strftime("%Y-%m-%d")

    return date_before_str, date_after_str


def convert_date(date_str):
    """
    Convert a date string from 'YYYY-MM-DD' format to 'YYYY Month DD' format.

    Args:
        date_str (str): The input date string in the format 'YYYY-MM-DD'.

    Returns:
        str: The date string formatted as 'YYYY Month DD'.
    """
    # Parse the input date string to a datetime object
    date = datetime.strptime(date_str, "%Y-%m-%d")

    # Format the datetime object to the desired format 'YYYY Month DD'
    formatted_date = date.strftime("%Y %B %d")

    return formatted_date


def sort_movies_by_rating(movies_list):
    """
    Sort a list of movies by their vote average in descending order.

    Args:
        movies_list (list): A list of dictionaries, where each dictionary represents a movie 
                            and contains a 'vote_average' key.

    Returns:
        list: The list of movies sorted by 'vote_average' in descending order.
    """
    # Sort the list of movies based on the 'vote_average' key in descending order
    return sorted(movies_list, key=lambda x: x["vote_average"], reverse=True)


from textblob import TextBlob


def get_sentiment(text):
    """
    Returns the polarity and subjectivity scores for the given text.
    Polarity score is a float within the range [-1, 1], where -1 represents a negative sentiment, and 1 represents a positive sentiment.
    Subjectivity score is a float within the range [0, 1], where 0 represents a very objective text, and 1 represents a very subjective text.
    """
    blob = TextBlob(text)
    polarity = blob.sentiment.polarity
    subjectivity = blob.sentiment.subjectivity

    if polarity > 0:
        sentiment = "Positive"
    elif polarity < 0:
        sentiment = "Negative"
    else:
        sentiment = "Neutral"

    return sentiment, polarity, subjectivity


In [3]:
import json
import requests
from typing import Any
from datetime import datetime


class MovieScraper:
    """
    A class to scrape and filter the latest movies from a specified production company using The Movie Database (TMDb) API.

    Attributes:
    api_key (str): API key for accessing TMDb.

    Methods:
    __init__(api_key: str) -> None:
        Initializes the MovieScraper with the provided API key.

    get_latest_movies(company_name: str) -> list:
        Retrieves the latest movies produced by the specified company. Returns a list of movies, or None if the company is not found or an error occurs.

    filter_movies_by_release_date(movies: list) -> list:
        Filters out movies that have not yet been released. Returns a list of released movies.

    __call__(company_name: str, get_released_only: bool = False) -> dict:
        Fetches the latest movies from the specified company and optionally filters for released movies only. Raises an exception if the production company is not found.
    """

    def __init__(self, api_key: str) -> None:
        """
        Initializes the MovieScraper with the provided API key.

        Parameters:
        api_key (str): The API key for accessing TMDb.
        """
        self.api_key = api_key

    def get_latest_movies(self, company_name: str) -> list:
        """
        Retrieves the latest movies produced by the specified company.

        Parameters:
        company_name (str): The name of the production company.

        Returns:
        list: A list of dictionaries containing movie information, or None if the company is not found or an error occurs.
        """
        search_url = f"https://api.themoviedb.org/3/search/company?api_key={self.api_key}&query={company_name}"
        search_response = requests.get(search_url)

        if search_response.status_code == 200:
            search_data = search_response.json()
            if search_data["results"]:
                company_id = search_data["results"][0]["id"]
            else:
                print(f"No company found with name {company_name}")
                return None
        else:
            print(f"Error: {search_response.status_code}")
            return None

        movies_url = f"https://api.themoviedb.org/3/discover/movie?api_key={self.api_key}&with_companies={company_id}&sort_by=release_date.desc"
        movies_response = requests.get(movies_url)

        if movies_response.status_code == 200:
            movies_data = movies_response.json()
            return movies_data["results"]
        else:
            print(f"Error: {movies_response.status_code}")
            return None

    def filter_movies_by_release_date(self, movies: list) -> list:
        """
        Filters out movies that have not yet been released.

        Parameters:
        movies (list): A list of dictionaries containing movie information.

        Returns:
        list: A list of dictionaries containing only the movies that have been released.
        """
        current_date = datetime.now().date()
        filtered_movies = [
            movie
            for movie in movies
            if datetime.strptime(movie["release_date"], "%Y-%m-%d").date()
            < current_date
        ]
        return filtered_movies

    def __call__(self, company_name: str, get_released_only: bool = False) -> dict:
        """
        Fetches the latest movies from the specified company and optionally filters for released movies only.

        Parameters:
        company_name (str): The name of the production company.
        get_released_only (bool): If True, filters the movies to include only those that have been released.

        Returns:
        dict: A list of dictionaries containing movie information. Raises an exception if the production company is not found.
        """
        latest_movies = self.get_latest_movies(company_name=company_name)

        if latest_movies:
            if get_released_only:
                return self.filter_movies_by_release_date(movies=latest_movies)
            return latest_movies
        else:
            raise Exception("Production company not found!")




In [4]:
import yfinance as yf

class StockScraper:
    """
    A class to scrape stock data using the yfinance library.

    Attributes:
        yf_ticker (str): The ticker symbol of the stock to be scraped.
    """

    def __init__(self, yf_ticker: str) -> None:
        """
        Initialize the StockScraper with a ticker symbol.

        Args:
            yf_ticker (str): The ticker symbol of the stock.
        """
        self.yf_ticker = yf_ticker

    def __call__(self, start_date: str, end_date: str):
        """
        Download and return stock data for the given date range.

        Args:
            start_date (str): The start date for the data in the format 'YYYY-MM-DD'.
            end_date (str): The end date for the data in the format 'YYYY-MM-DD'.

        Returns:
            pandas.DataFrame: A DataFrame containing the stock data with columns 'Open', 'High', 'Low', 'Close', and 'Volume'.
        """
        # Download the stock data from Yahoo Finance
        data = yf.download(self.yf_ticker, start=start_date, end=end_date)
        
        # Extract relevant columns
        data = data[['Open', 'High', 'Low', 'Close', 'Volume']]
        
        # Reset the index to move the date from index to a column
        data.reset_index(inplace=True)
        
        return data


In [5]:
import pandas as pd
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from sklearn.linear_model import LinearRegression

class Visualizer:
    """
    A class to visualize stock data with various plotting methods.

    Attributes:
        data (pd.DataFrame): The stock data to visualize.
        producer_name (str): The name of the producer.
        movie_name (str): The name of the movie.
        start_date (str): The start date for the data in the format 'YYYY-MM-DD'.
        end_date (str): The end date for the data in the format 'YYYY-MM-DD'.
        fig_height (int): The height of the figure for the plots.
    """

    def __init__(
        self,
        data: pd.DataFrame,
        producer_name: str,
        movie_name: str,
        start_date: str,
        end_date: str,
        fig_height: int = 800,
    ) -> None:
        """
        Initialize the Visualizer with stock data and metadata.

        Args:
            data (pd.DataFrame): The stock data.
            producer_name (str): The name of the producer.
            movie_name (str): The name of the movie.
            start_date (str): The start date for the data in the format 'YYYY-MM-DD'.
            end_date (str): The end date for the data in the format 'YYYY-MM-DD'.
            fig_height (int): The height of the figure for the plots (default is 800).
        """
        self.df = data
        self.df["Date"] = pd.to_datetime(self.df["Date"])

        self.fig_height = fig_height

        self.producer_name = producer_name

        self.movie_name = movie_name

        self.start_date = start_date
        self.end_date = end_date

    def plot_o_h_l_c_v(self):
        """
        Plot the open, high, low, close prices and volume of the stock data.

        Returns:
            fig (plotly.graph_objs._figure.Figure): The Plotly figure object.
        """
        fig = make_subplots(rows=1, cols=1, specs=[[{"secondary_y": True}]])

        # Add traces for Open, High, Low, Close
        fig.add_trace(
            go.Scatter(
                x=self.df["Date"], y=self.df["Open"], name="Open", mode="lines+markers"
            ),
            secondary_y=True,
        )
        fig.add_trace(
            go.Scatter(
                x=self.df["Date"], y=self.df["High"], name="High", mode="lines+markers"
            ),
            secondary_y=True,
        )
        fig.add_trace(
            go.Scatter(
                x=self.df["Date"], y=self.df["Low"], name="Low", mode="lines+markers"
            ),
            secondary_y=True,
        )
        fig.add_trace(
            go.Scatter(
                x=self.df["Date"],
                y=self.df["Close"],
                name="Close",
                mode="lines+markers",
            ),
            secondary_y=True,
        )

        # Add trace for Volume using a bar chart
        fig.add_trace(
            go.Bar(
                x=self.df["Date"],
                y=self.df["Volume"],
                name="Volume",
                marker_color="grey",
            ),
            secondary_y=False,
        )

        # Update layout for a cleaner look
        fig.update_layout(
            title=f"{self.producer_name} Stock Prices and Volume between {str(convert_date(self.start_date))} and {str(convert_date(self.end_date))} when movie {self.movie_name} was released.",
            xaxis_title="Date",
            yaxis_title="Price",
            yaxis2_title="Volume",
            legend_title="Data Type",
            template="plotly_white",
        )

        # Set y-axes titles
        fig.update_yaxes(title_text="Price", secondary_y=True)
        fig.update_yaxes(title_text="Volume", secondary_y=False)

        fig.update_layout(height=self.fig_height)

        return fig

    def plot_release_date(self, release_date):
        """
        Plot the stock prices and highlight the release date of the movie.

        Args:
            release_date (str): The release date of the movie in the format 'YYYY-MM-DD'.

        Returns:
            fig (plotly.graph_objs._figure.Figure): The Plotly figure object.
        """
        # Create a subplot
        fig = make_subplots(rows=1, cols=1)

        # Add traces for Open, High, Low, Close
        fig.add_trace(
            go.Scatter(
                x=self.df["Date"], y=self.df["Open"], name="Open", mode="lines+markers"
            )
        )
        fig.add_trace(
            go.Scatter(
                x=self.df["Date"],
                y=self.df["Close"],
                name="Close",
                mode="lines+markers",
            )
        )

        # Highlight the release date
        highlight_date = release_date
        fig.add_vline(
            x=highlight_date, line_width=3, line_dash="dash", line_color="green"
        )

        # Update layout for a cleaner look
        fig.update_layout(
            title=f"Stock Price Trends with Highlight on movie release date: {str(convert_date(release_date))}",
            xaxis_title="Date",
            yaxis_title="Price",
            legend_title="Price Type",
            template="plotly_white",
            height=self.fig_height,
        )

        return fig

    def plot_overall_trends(self, release_date):
        """
        Plot the overall stock price trends and a trend line after the release date of the movie.

        Args:
            release_date (str): The release date of the movie in the format 'YYYY-MM-DD'.

        Returns:
            fig (plotly.graph_objs._figure.Figure): The Plotly figure object.
        """
        after_release = self.df[self.df["Date"] > release_date]

        # Prepare the data for regression
        X = (after_release["Date"] - after_release["Date"].min()).dt.days.values.reshape(
            -1, 1
        )
        y = after_release["Close"].values

        # Perform linear regression
        model = LinearRegression()
        model.fit(X, y)

        # Predict the trend line
        trend_line = model.predict(X)

        # Create a subplot
        fig = make_subplots(rows=1, cols=1)

        # Add the actual data points for 'Close' price
        fig.add_trace(
            go.Scatter(
                x=self.df["Date"],
                y=self.df["Close"],
                mode="lines+markers",
                name="Close",
            )
        )

        # Add the trend line
        fig.add_trace(
            go.Scatter(
                x=after_release["Date"],
                y=trend_line,
                mode="lines",
                name=f"Trend after movie release.",
            )
        )

        # Highlight the release date
        fig.add_vline(
            x=release_date, line_width=3, line_dash="dash", line_color="green"
        )

        # Update layout for a cleaner look
        fig.update_layout(
            title=f"Stock Price Trends with Trend Line after the release date {str(convert_date(date_str=release_date))} of {self.movie_name}",
            xaxis_title="Date",
            yaxis_title="Price",
            legend_title="Price Type",
            template="plotly_white",
            height=self.fig_height,
        )

        return fig

def convert_date(date_str):
    """
    Convert a date string from 'YYYY-MM-DD' format to 'YYYY Month DD' format.

    Args:
        date_str (str): The input date string in the format 'YYYY-MM-DD'.

    Returns:
        str: The date string formatted as 'YYYY Month DD'.
    """
    date = pd.to_datetime(date_str)
    return date.strftime("%Y %B %d")


In [6]:
# initialize the movie scraper object and get all the latest movies
movie_getter = MovieScraper(api_key="1b37fdfd40bfc02f3e734a5a77f5599d")

# get the latest movies that are relased before today's date 
movies = movie_getter(company_name="Warner Bros.", get_released_only=False)

# sort the movies based on the rating 
movies_sorted = sort_movies_by_rating(movies_list=movies)

In [7]:
for i in movies_sorted:
    print(f"Movie: {i["title"]}")

Movie: Furiosa: A Mad Max Saga
Movie: Wonka
Movie: Barbie
Movie: The Color Purple
Movie: Evil Dead Rise
Movie: Black Adam
Movie: Don't Worry Darling
Movie: Blue Beetle
Movie: Aquaman and the Lost Kingdom
Movie: The Flash
Movie: Magic Mike's Last Dance
Movie: Meg 2: The Trench
Movie: BC Project
Movie: Flowervale Street
Movie: Minecraft
Movie: Alto Knights
Movie: Mickey 17
Movie: Joker: Folie à Deux
Movie: Beetlejuice Beetlejuice
Movie: Twisters


**Now, let's iter over each of the movies one by one:** 

In [14]:
import pandas as pd

# Initialize an empty list to store the data
movie_stock_data = []

# Iterate up to the top "N" movies
for item in movies_sorted[:N]:

    # Extract relevant details from each movie item
    is_adult_movie = item["adult"]
    movie_name = item["original_title"]
    overview = item["overview"]
    release_date = item["release_date"]
    rating = item["vote_average"]
    pop = item["popularity"]
    
    # Print movie details
    print("*" * 20, movie_name, "*" * 20)
    print(f"R rated movie: {is_adult_movie}")
    print(f"Overall Rating: {rating}  Popularity: {pop}")
    print(f"Movie Overview: {overview}\n\n")
    
    # Perform sentiment analysis on the movie overview
    print(f"Let's perform sentiment analysis of the movie:")
    print("-" * 50)
    sentiment, polarity, subjectivity = get_sentiment(text=overview)
    print(f"Sentiment: {sentiment}")
    print(f"Polarity: {polarity}")
    print(f"Subjectivity: {subjectivity}")
    print(f"\n\n")
    
    print(f"Release Date: {release_date}")
    
    # Get the time period to check the stock market around the movie release date
    date_before_str, date_after_str = get_dates_around(date_str=release_date)
    print(f"For the movie {movie_name}, we will be looking at the stock market between the two time periods: {date_before_str} -- {date_after_str}")
    
    # Download stock data
    print("Downloading stock data: ")
    scraper = StockScraper(yf_ticker=COMPANY_CODE)
    scraped_data = scraper(start_date=date_before_str, end_date=date_after_str)
    
    # Print stock data before and after the movie release date
    print("Stock data before the release date:")
    print(scraped_data.head())
    print("\nStock data after the release date:")
    print(scraped_data.tail())
    
    # Determine the rise or drop in stock
    stock_before = round(scraped_data.iloc[0]['Close'], 2)
    stock_after = round(scraped_data.iloc[-1]['Close'], 2)
    change = round(stock_after - stock_before, 2)
    change_percent = round((change / stock_before) * 100, 2)
    increase_decrease = "Increase" if change > 0 else "Decrease"
    
    # Append the data to the list
    movie_stock_data.append({
        "Movie Name": movie_name,
        "Release Date": release_date,
        "Close Price Before Release": stock_before,
        "Close Price After Release": stock_after,
        "Increase/Decrease": increase_decrease,
        "Amount": change,
        "Percentage Change": change_percent,
        "Sentiment": sentiment
    })
    
    # Initialize the Visualizer class with the scraped data
    visualizer = Visualizer(
        data=scraped_data,
        producer_name=PRODUCTION_COMPANY,
        movie_name=movie_name,
        start_date=date_before_str,
        end_date=date_after_str,
    )
    
    # Plot Open, High, Low, Close, and Volume data
    all_plot = visualizer.plot_o_h_l_c_v()
    all_plot.show()
    
    # Plot the stock prices with a highlight on the release date
    plot_release_date = visualizer.plot_release_date(release_date=release_date)
    plot_release_date.show()
    
    # Plot overall trends and a regression line after the release date
    reg_plot = visualizer.plot_overall_trends(release_date=release_date)
    reg_plot.show()

# Create a DataFrame from the collected data
df_movie_stock = pd.DataFrame(movie_stock_data)

# Display the DataFrame
print(df_movie_stock)


[*********************100%%**********************]  1 of 1 completed

******************** Furiosa: A Mad Max Saga ********************
R rated movie: False
Overall Rating: 7.637  Popularity: 666.043
Movie Overview: As the world fell, young Furiosa is snatched from the Green Place of Many Mothers and falls into the hands of a great Biker Horde led by the Warlord Dementus. Sweeping through the Wasteland they come across the Citadel presided over by The Immortan Joe. While the two Tyrants war for dominance, Furiosa must survive many trials as she puts together the means to find her way home.


Let's perform sentiment analysis of the movie:
--------------------------------------------------
Sentiment: Positive
Polarity: 0.34
Subjectivity: 0.49000000000000005



Release Date: 2024-05-22
For the movie Furiosa: A Mad Max Saga, we will be looking at the stock market between the two time periods: 2024-05-20 -- 2024-05-24
Downloading stock data: 
Stock data before the release date:
        Date  Open  High   Low  Close    Volume
0 2024-05-20  8.05  8.12  7.99   8




******************** Wonka ********************
R rated movie: False
Overall Rating: 7.165  Popularity: 282.062
Movie Overview: Willy Wonka – chock-full of ideas and determined to change the world one delectable bite at a time – is proof that the best things in life begin with a dream, and if you’re lucky enough to meet Willy Wonka, anything is possible.


Let's perform sentiment analysis of the movie:
--------------------------------------------------
Sentiment: Positive
Polarity: 0.3333333333333333
Subjectivity: 0.6583333333333333



Release Date: 2023-12-06
For the movie Wonka, we will be looking at the stock market between the two time periods: 2023-12-04 -- 2023-12-08
Downloading stock data: 


[*********************100%%**********************]  1 of 1 completed


Stock data before the release date:
        Date   Open   High    Low  Close    Volume
0 2023-12-04  11.24  11.44  11.16  11.39  20425200
1 2023-12-05  11.23  11.31  10.78  10.87  19524300
2 2023-12-06  11.03  11.28  10.89  10.92  15783100
3 2023-12-07  10.95  11.09  10.73  10.82  17100000

Stock data after the release date:
        Date   Open   High    Low  Close    Volume
0 2023-12-04  11.24  11.44  11.16  11.39  20425200
1 2023-12-05  11.23  11.31  10.78  10.87  19524300
2 2023-12-06  11.03  11.28  10.89  10.92  15783100
3 2023-12-07  10.95  11.09  10.73  10.82  17100000


******************** Barbie ********************
R rated movie: False
Overall Rating: 7.051  Popularity: 334.921
Movie Overview: Barbie and Ken are having the time of their lives in the colorful and seemingly perfect world of Barbie Land. However, when they get a chance to go to the real world, they soon discover the joys and perils of living among humans.


Let's perform sentiment analysis of the movie:
--------------------------------------------------
Sentiment: Positive
Polarity: 0.5
Subjectivity: 0.5666666666666667



Release Date: 2023-07-19
For the movie Barbie, we will be looking at the stock market between the two time periods: 2023-07-17 -- 2023-07-21
Downloading stock data: 


[*********************100%%**********************]  1 of 1 completed


Stock data before the release date:
        Date   Open   High    Low  Close    Volume
0 2023-07-17  12.36  12.36  12.14  12.31  17472100
1 2023-07-18  12.33  13.05  12.32  12.77  20815500
2 2023-07-19  12.84  13.36  12.83  13.28  19700900
3 2023-07-20  13.16  13.42  13.04  13.11  15620900

Stock data after the release date:
        Date   Open   High    Low  Close    Volume
0 2023-07-17  12.36  12.36  12.14  12.31  17472100
1 2023-07-18  12.33  13.05  12.32  12.77  20815500
2 2023-07-19  12.84  13.36  12.83  13.28  19700900
3 2023-07-20  13.16  13.42  13.04  13.11  15620900


******************** The Color Purple ********************
R rated movie: False
Overall Rating: 7.025  Popularity: 39.365
Movie Overview: A decades-spanning tale of love and resilience and of one woman's journey to independence. Celie faces many hardships in her life, but ultimately finds extraordinary strength and hope in the unbreakable bonds of sisterhood.


Let's perform sentiment analysis of the movie:
--------------------------------------------------
Sentiment: Positive
Polarity: 0.3333333333333333
Subjectivity: 0.775



Release Date: 2023-12-25
For the movie The Color Purple, we will be looking at the stock market between the two time periods: 2023-12-23 -- 2023-12-27
Downloading stock data: 


[*********************100%%**********************]  1 of 1 completed

Stock data before the release date:
        Date   Open   High    Low  Close    Volume
0 2023-12-26  11.27  11.56  11.17   11.5  17934500

Stock data after the release date:
        Date   Open   High    Low  Close    Volume
0 2023-12-26  11.27  11.56  11.17   11.5  17934500





******************** Evil Dead Rise ********************
R rated movie: False
Overall Rating: 6.957  Popularity: 91.797
Movie Overview: A reunion between two estranged sisters gets cut short by the rise of flesh-possessing demons, thrusting them into a primal battle for survival as they face the most nightmarish version of family imaginable.


Let's perform sentiment analysis of the movie:
--------------------------------------------------
Sentiment: Positive
Polarity: 0.25
Subjectivity: 0.4



Release Date: 2023-04-12
For the movie Evil Dead Rise, we will be looking at the stock market between the two time periods: 2023-04-10 -- 2023-04-14
Downloading stock data: 


[*********************100%%**********************]  1 of 1 completed

Stock data before the release date:
        Date   Open   High    Low  Close    Volume
0 2023-04-10  15.01  15.52  14.96  15.40  15878300
1 2023-04-11  15.50  15.56  14.86  14.93  19676900
2 2023-04-12  15.07  15.14  13.97  14.06  26222900
3 2023-04-13  14.11  14.48  13.73  14.04  36540500

Stock data after the release date:
        Date   Open   High    Low  Close    Volume
0 2023-04-10  15.01  15.52  14.96  15.40  15878300
1 2023-04-11  15.50  15.56  14.86  14.93  19676900
2 2023-04-12  15.07  15.14  13.97  14.06  26222900
3 2023-04-13  14.11  14.48  13.73  14.04  36540500





                Movie Name Release Date  Close Price Before Release  \
0  Furiosa: A Mad Max Saga   2024-05-22                        8.09   
1                    Wonka   2023-12-06                       11.39   
2                   Barbie   2023-07-19                       12.31   
3         The Color Purple   2023-12-25                       11.50   
4           Evil Dead Rise   2023-04-12                       15.40   

   Close Price After Release Increase/Decrease  Amount  Percentage Change  \
0                       7.70          Decrease   -0.39              -4.82   
1                      10.82          Decrease   -0.57              -5.00   
2                      13.11          Increase    0.80               6.50   
3                      11.50          Decrease    0.00               0.00   
4                      14.04          Decrease   -1.36              -8.83   

  Sentiment  
0  Positive  
1  Positive  
2  Positive  
3  Positive  
4  Positive  


In [16]:
df_movie_stock.to_csv("res.csv")

In [None]:
pd.read_csv("")