# **Assignment:** Animate graph using Monthly Data, convert the data into monthly data and then animated the plot, save it as avi and mp4 file or gif

In [1]:
# import libraries
import pandas as pd
import plotly.express as px
import numpy as np

- **Import dataset of World Stock Prices from Jan,2000 to Sep,2023**\
- Dataset has been downloaded from this [kaggle link](https://www.kaggle.com/datasets/nelgiriyewithana/world-stock-prices-daily-updating)

In [2]:

df = pd.read_csv("../01_datasets/world_stock_prices.csv")

In [3]:
df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends',
       'Stock Splits', 'Brand_Name', 'Ticker', 'Industry_Tag', 'Country'],
      dtype='object')

In [4]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Brand_Name,Ticker,Industry_Tag,Country
0,2023-09-20 00:00:00-04:00,4.84,4.91,4.63,4.67,7441900.0,0.0,0.0,peloton,PTON,fitness,usa
1,2023-09-20 00:00:00-04:00,397.049988,397.98999,386.119995,386.299988,3866600.0,0.0,0.0,netflix,NFLX,entertainment,usa
2,2023-09-20 00:00:00-04:00,564.349976,569.219971,562.659973,563.830017,1311500.0,0.0,0.0,costco,COST,retail,usa
3,2023-09-20 00:00:00-04:00,138.550003,139.369995,135.199997,135.289993,46263700.0,0.0,0.0,amazon,AMZN,e-commerce,usa
4,2023-09-20 00:00:00-04:00,179.259995,179.699997,175.399994,175.490005,58436200.0,0.0,0.0,apple,AAPL,technology,usa


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 279753 entries, 0 to 279752
Data columns (total 12 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   Date          279753 non-null  object 
 1   Open          279753 non-null  float64
 2   High          279753 non-null  float64
 3   Low           279753 non-null  float64
 4   Close         279753 non-null  float64
 5   Volume        279753 non-null  float64
 6   Dividends     279753 non-null  float64
 7   Stock Splits  279753 non-null  float64
 8   Brand_Name    279753 non-null  object 
 9   Ticker        279753 non-null  object 
 10  Industry_Tag  279753 non-null  object 
 11  Country       279753 non-null  object 
dtypes: float64(7), object(5)
memory usage: 25.6+ MB


In [6]:
df['Date'].nunique() # Every date have only one entry/row

5966

In [7]:
df['Date'].head()

0    2023-09-20 00:00:00-04:00
1    2023-09-20 00:00:00-04:00
2    2023-09-20 00:00:00-04:00
3    2023-09-20 00:00:00-04:00
4    2023-09-20 00:00:00-04:00
Name: Date, dtype: object

- Remove '00:00:00-' from date values, for this operation we must import numpy library

In [8]:
df['Date'] = df['Date'].str.replace('00:00:00-', '')

In [9]:
df['Date'].head()

0    2023-09-20 04:00
1    2023-09-20 04:00
2    2023-09-20 04:00
3    2023-09-20 04:00
4    2023-09-20 04:00
Name: Date, dtype: object

In [10]:
df['Date'].tail()

279748    2023-08-29 04:00
279749    2023-08-30 04:00
279750    2023-08-31 04:00
279751    2023-09-01 04:00
279752    2023-09-05 04:00
Name: Date, dtype: object

In [11]:
print(df['Date'].min())
print(df['Date'].max())

2000-01-03 05:00
2023-09-20 04:00


In [12]:
df['Country'].unique()

array(['usa', 'japan', 'germany', 'switzerland', 'canada', 'netherlands',
       'france'], dtype=object)

In [13]:
#columns are 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends',
#       'Stock Splits', 'Brand_Name', 'Ticker', 'Industry_Tag', 'Country'

# group coffee based on each month in date and take average of new_cases
# Convert the 'date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Create a new column 'year_month' to represent the year and month
df['year_month'] = df['Date'].dt.to_period('M')

# Now group by 'year_month' and 'location', and sum the 'new_cases' & total_cases for each group
monthly = df.groupby(['year_month', 'Country']).agg({'Volume': 'sum'}).reset_index()

# Convert 'year_month' back to a string format for readability
monthly['year_month'] = monthly['year_month'].astype(str)

# This will give you a new DataFrame with the sum of new cases for each month and location
monthly.head()

Unnamed: 0,year_month,Country,Volume
0,2000-01,japan,8388700.0
1,2000-01,netherlands,48002560.0
2,2000-01,switzerland,4511200.0
3,2000-01,usa,23615150000.0
4,2000-02,japan,14259500.0


In [14]:
monthly['Volume'].min()

5000.0

In [15]:
monthly['Volume'].max()

76494381784.0

- ### Display row on specific column's value. Here, maximum volume's row in dataframe

In [16]:
# Filter the DataFrame based on the specified value
specific_row = df[df['Volume'] == df['Volume'].max()]

# Display the filtered DataFrame
print(specific_row)

                     Date     Open      High       Low     Close  \
19219 2000-09-29 04:00:00  0.42724  0.439555  0.384611  0.390294   

             Volume  Dividends  Stock Splits Brand_Name Ticker Industry_Tag  \
19219  7.421641e+09        0.0           0.0      apple   AAPL   technology   

      Country year_month  
19219     usa    2000-09  


In [18]:
fig = px.choropleth(monthly, locations='Country', locationmode='country names', color = 'Volume',
                    title='World Stock Prices in USD (January 2000 to Sep 2023)',
                    range_color=[0, monthly['Volume'].quantile(1)],
                    animation_frame= 'year_month', color_continuous_scale='viridis',
                    labels={'year_month':'Year-Month', 'Volume':'Volume in USD'}
                    )
# increase the size of the map and center the title text
fig.update_layout(height=600, width=800, title_x=0.5)

In [None]:
# saving animated gif
import plotly.express as px
import pandas as pd
import numpy as np
import io
import PIL
fig = px.choropleth(monthly, locations='Country', locationmode='country names', color = 'Volume',
                    title='World Stock Prices in USD (January 2000 to Sep 2023)', 
                    range_color=[0, monthly['Volume'].quantile(1)],
                    animation_frame= 'year_month', color_continuous_scale='viridis',
                    labels={'year_month':'Year-Month', 'Volume':'Volume in USD'}
                    )
# increase the size of the map and center the title text
fig.update_layout(height=600, width=800, title_x=0.5)

# generate images for each step in animation
frames = []
for s, fr in enumerate(fig.frames):
    # set main traces to appropriate traces within plotly frame
    fig.update(data=fr.data)
    # move slider to correct place
    fig.layout.sliders[0].update(active=s)
    # generate image of current state
    frames.append(PIL.Image.open(io.BytesIO(fig.to_image(format="png", scale=3))))
    
# create animated GIF
frames[0].save(
        "./assign_plots/world_stock_prices.gif",
        save_all=True,
        append_images=frames[1:],
        optimize=True,
        duration=500, # milliseconds per frame
        loop=0, # infinite loop
        dither=None  # Turn off dithering
    )