In [None]:
# Data visualization
# BTech Computer Science Stream, January 2025
# Week 11 & 12 - Time Series Case Study 
# Name: Oswin Vion Dsilva, 240905268, Section CE, Date: 26/03/2024

# Importing libraries

In [None]:
# Imports

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Loading the csv data into dataframes

In [None]:
# Loading the data 

var_dfStocks = pd.read_csv("nvidia_stock_data.csv")

var_dfStocks

# Section 1 : Exploratory Analysis

In [None]:
# Checking Data types and presence of null values

var_dfStocks.info()

In [None]:
# Checking general statistics of the data

var_dfStocks.describe()

In [None]:
# Box plot to gauge the data

for col in ['Open','High','Low','Close','Adj Close','Volume']:
    plt.figure()
    sns.boxplot(data = var_dfStocks[col])
    plt.title(f"Box plot of {col}")
    plt.show()

# Section 2 : Data Pre-processing

In [None]:
# Function to split the data into year , month and day

def date_split(df):
    df[['Day','Month','Year']] = df['Date'].str.split('-', expand = True).astype('int64')
    
    return df

In [None]:
# Splitting the date using the function

var_dfStocks = date_split(var_dfStocks)

var_dfStocks

In [None]:
# Converting the date into a date object type

var_dfStocks['Date'] = pd.to_datetime(var_dfStocks['Date'], dayfirst = True)

var_dfStocks.info()

In [None]:
# Labelling as Increase or Decrease for the day

var_dfStocks['Net'] = np.where(var_dfStocks['Close'] > var_dfStocks['Open'], 'Gain','Loss')

var_dfStocks

# Data visualization

In [None]:
# Function to plot candlestick plot for any given dataframe

def plot_candlestick(df,w1,w2):
    var_dfGain = df[df['Net'] == "Gain"]

    var_dfLoss = df[df['Net'] == "Loss"]

    var_Color_up = 'green'
    var_Color_down = 'red'

    width1 = w1
    width2 = w2

    plt.figure(figsize=(9,9))

    # Plotting the Net Increase values
    ## Plotting the Net value change whose width should be the closing value - opening value, but should go above the opening values since this is for Gain
    plt.bar(var_dfGain.Date, var_dfGain.Close - var_dfGain.Open, width1, bottom = var_dfGain.Open, color = var_Color_up)

    ## Plotting the thin bars representing the peak value 
    plt.bar(var_dfGain.Date, var_dfGain.High - var_dfGain.Close , width2, bottom = var_dfGain.Close, color = var_Color_up)

    ## Plotting the thin bars representing the lowest value
    plt.bar(var_dfGain.Date, var_dfGain.Low - var_dfGain.Open, width2, bottom = var_dfGain.Open, color = var_Color_up)

    # Plotting the Net Decrease values
    ## Plotting the Net value change whose width should be the opening value - closing value but should go below the opening values since this is for Loss
    plt.bar(var_dfLoss.Date, var_dfLoss.Close - var_dfLoss.Open, width1, bottom = var_dfLoss.Open, color = var_Color_down)

    ## Plotting the thin bars representing the peak value
    plt.bar(var_dfLoss.Date, var_dfLoss.High - var_dfLoss.Open, width2, bottom = var_dfLoss.Open, color = var_Color_down)

    ## Plotting the thin bars representing the lowest value
    plt.bar(var_dfLoss.Date, var_dfLoss.Low - var_dfLoss.Close, width2, bottom = var_dfLoss.Close, color = var_Color_down)


In [None]:
# Candle Stick graph for the entire time period 

plot_candlestick(var_dfStocks,5,0.5)
plt.show()

# As we can see, there are breakages in the graph .
# This is mainly due to 2 reasons:
# 1. Unavailable data on those days
# 2. After hour/pre-market trading 

# A better analysis would be visualizing the month by month candle stick graphs for each year 

In [None]:
var_plot_num = 1

var_mapNumberToMonth = {
    1:'January',
    2:'February',
    3:'March',
    4:'April',
    5:'May',
    6:'June',
    7:'July',
    8:'August',
    9:'September',
    10:'October',
    11:'November',
    12:'December'}

for year in var_dfStocks['Year'].unique():
    for month in var_dfStocks['Month'].unique():
        var_dfPlotting = var_dfStocks[(var_dfStocks['Month'] == month) & (var_dfStocks['Year'] == year)]
        plot_candlestick(var_dfPlotting,0.3,0.03)
        month = var_mapNumberToMonth[month]
        plt.title(f"Stock prices for each day of {month} for the year {year}")
        plt.xticks(size = 7)
        plt.show()