In [20]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


In [4]:
# Load the Coca-Cola stock data
stock_data = pd.read_csv('https://raw.githubusercontent.com/Tirth678/Data-Analysis-Projects/refs/heads/main/Coco%20Cola/Data/Coca-Cola_stock_info.csv')

# Check for missing values
missing_values = stock_data.isnull().sum()

# Handle missing values using forward fill
stock_data.fillna(method='ffill', inplace=True)

# Remove unnecessary rows if any (assuming we want to drop rows with all NaN values)
stock_data.dropna(how='all', inplace=True)

# Display the cleaned data
print(stock_data.head())


In [13]:
# First make sure yfinance is installed:
# !pip install yfinance

import yfinance as yf

# Fetch Coca-Cola stock data
try:
    ticker = 'KO'
    data = yf.download(ticker, start='2015-01-01', end='2023-12-31')
    data.reset_index(inplace=True)
    print(data.head())
except Exception as e:
    print(f"Error occurred: {e}")
    print("Please make sure yfinance is installed using: pip install yfinance")


In [14]:
# Check for missing values
print(data.isnull().sum())

# Handle missing values
data.fillna(method='ffill', inplace=True)
data.fillna(0, inplace=True)  # Replace remaining missing values with 0

# Check data types
print(data.dtypes)


In [15]:
# Add Moving Averages
data['MA_20'] = data['Close'].rolling(window=20).mean()
data['MA_50'] = data['Close'].rolling(window=50).mean()

# Add Daily Returns
data['Daily_Return'] = data['Close'].pct_change()

# Add Volatility
data['Volatility'] = data['Daily_Return'].rolling(window=20).std()

# Drop rows with NA due to rolling calculations
data.dropna(inplace=True)


In [16]:
print(data.describe())


In [17]:
plt.figure(figsize=(12, 6))
plt.plot(data['Date'], data['Close'], label='Close Price', color='blue')
plt.plot(data['Date'], data['MA_20'], label='20-Day MA', linestyle='--', color='orange')
plt.plot(data['Date'], data['MA_50'], label='50-Day MA', linestyle='--', color='green')
plt.title('Coca-Cola Stock Prices with Moving Averages')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()


In [18]:
plt.figure(figsize=(10, 8))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()


In [30]:
# Ensure the 'Date' column is in datetime format
data['Date'] = pd.to_datetime(data['Date'])

# Extract the month and year
data['YearMonth'] = data['Date'].dt.to_period('M')

# Group by 'YearMonth' and calculate the total volume
monthly_volume = data.groupby('YearMonth')['Volume'].sum().reset_index()

# Convert 'YearMonth' back to a datetime format for plotting
monthly_volume['YearMonth'] = monthly_volume['YearMonth'].dt.to_timestamp()


In [26]:
plt.figure(figsize=(12, 6))
plt.plot(data['Date'], data['Volatility'], color='red', label='Volatility')
plt.title('Volatility Over Time')
plt.xlabel('Date')
plt.ylabel('Volatility')
plt.legend()
plt.show()


In [27]:
data.to_csv('cleaned_coca_cola_stock.csv', index=False)


In [31]:
plt.figure(figsize=(14, 7))
plt.plot(monthly_volume['YearMonth'], monthly_volume['Volume'], marker='o', linestyle='-', color='blue')
plt.title('Monthly Trading Volume for Coca-Cola Stock', fontsize=16)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Total Trading Volume', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [32]:
# Find the months with highest and lowest volumes
highest_volume = monthly_volume.loc[monthly_volume['Volume'].idxmax()]
lowest_volume = monthly_volume.loc[monthly_volume['Volume'].idxmin()]

print(f"Highest Trading Volume: {highest_volume['Volume']} in {highest_volume['YearMonth']}")
print(f"Lowest Trading Volume: {lowest_volume['Volume']} in {lowest_volume['YearMonth']}")
