**Import Libraries and Loading the Dataset**

In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from ipywidgets import interactive, Dropdown
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from ipywidgets import widgets
from IPython.display import display
import numpy as np
dataset = pd.read_csv('/kaggle/input/us-stock-market-2020-to-2024/US Stock Market Dataset.csv')

**Convert Price Columns from Strings to Floats**

In [2]:
price_columns = ['Bitcoin_Price', 'Platinum_Price', 'Ethereum_Price', 'S&P_500_Price', 
                 'Nasdaq_100_Price', 'Berkshire_Price', 'Gold_Price']
for col in price_columns:
    dataset[col] = dataset[col].str.replace(',', '').astype(float)

**Handle Missing Values**

In [3]:
dataset.fillna(method='ffill', inplace=True)
dataset.fillna(method='bfill', inplace=True)

  dataset.fillna(method='ffill', inplace=True)
  dataset.fillna(method='bfill', inplace=True)


**Check the Final State of the Dataset**

In [4]:
print(dataset.head())
print(dataset.isnull().sum())

   Unnamed: 0        Date  Natural_Gas_Price  Natural_Gas_Vol.  \
0           0    2/2/2024              2.079          161340.0   
1           1    1/2/2024              2.050          161340.0   
2           2  31-01-2024              2.100          142860.0   
3           3  30-01-2024              2.077          139750.0   
4           4  29-01-2024              2.490            3590.0   

   Crude_oil_Price  Crude_oil_Vol.  Copper_Price  Copper_Vol.  Bitcoin_Price  \
0            72.28        577940.0        3.8215      38000.0        43194.7   
1            73.82        577940.0        3.8535      38000.0        43081.4   
2            75.85        344490.0        3.9060      38000.0        42580.5   
3            77.82        347240.0        3.9110      38000.0        42946.2   
4            76.78        331930.0        3.8790      38000.0        43299.8   

   Bitcoin_Vol.  ...  Berkshire_Price  Berkshire_Vol.  Netflix_Price  \
0         42650  ...         589498.0           10

**Interactive Time Series Plot for Stock Prices**

In [5]:
def interactive_line_plot(y_axis):
    fig = go.Figure()

    # Update the plot
    fig.add_trace(go.Scatter(x=dataset['Date'], y=dataset[y_axis], mode='lines', name=y_axis))

    # Update layout
    fig.update_layout(title=f'{y_axis} Over Time', xaxis_title='Date', yaxis_title=y_axis)

    fig.show()

exclude_columns = ['Date']
y_axis_options = [col for col in dataset.columns if col not in exclude_columns]
interactive_plot = interactive(interactive_line_plot, y_axis=y_axis_options)
interactive_plot

interactive(children=(Dropdown(description='y_axis', options=('Unnamed: 0', 'Natural_Gas_Price', 'Natural_Gas_…

**Interactive Scatter Plot to Compare Two Stocks**

In [6]:
def interactive_scatter_plot(x_stock, y_stock):
    fig = px.scatter(dataset, x=x_stock, y=y_stock, title=f'{x_stock} vs. {y_stock} Stock Prices',
                     trendline='ols')  # Add a trendline for linear regression fit
    fig.update_xaxes(title_text=f'{x_stock} Price in USD')
    fig.update_yaxes(title_text=f'{y_stock} Price in USD')
    
    fig.show()
x_dropdown = Dropdown(options=[col for col in dataset.columns if '_Price' in col], description='X-axis')
y_dropdown = Dropdown(options=[col for col in dataset.columns if '_Price' in col], description='Y-axis')
interactive_widget = interactive(interactive_scatter_plot, x_stock=x_dropdown, y_stock=y_dropdown)
interactive_widget

interactive(children=(Dropdown(description='X-axis', options=('Natural_Gas_Price', 'Crude_oil_Price', 'Copper_…

**Interactive Bar Chart for Cryptocurrency Volumes**

In [7]:
fig = px.bar(dataset, x='Date', y='Bitcoin_Vol.', title='Bitcoin Trading Volume Over Time')
fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='Volume')
fig.show()


In [8]:
dataset['Date'] = pd.to_datetime(dataset['Date'], errors='coerce', dayfirst=True)
dataset.sort_values('Date', inplace=True)
dataset['Previous_Apple_Price'] = dataset['Apple_Price'].shift(1)
dataset.dropna(inplace=True)  # Drop the first row where the previous price would be NaN
X = dataset[['Previous_Apple_Price']]
y = dataset['Apple_Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [10]:
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 14.804129396475934


**Visual for Predictive Modeling of Price**

In [11]:
# Ensure the 'Date' column is in datetime format
dataset['Date'] = pd.to_datetime(dataset['Date'], errors='coerce')

# Drop rows where 'Date' could not be converted to datetime (if any)
dataset = dataset.dropna(subset=['Date'])

# Sort the dataset by 'Date' to ensure chronological order
dataset.sort_values('Date', inplace=True)

# Calculate 'Day_Count' based on the 'Date' column
dataset['Day_Count'] = (dataset['Date'] - dataset['Date'].min()).dt.days

# Now, check for NaN values across the dataset
print(dataset.isnull().sum())

# Consider dropping NaN values specifically in columns of interest if necessary
dataset_clean = dataset.dropna(subset=['Day_Count', 'Apple_Price'])

# After cleaning, confirm that 'Day_Count' and 'Apple_Price' are available
print(dataset_clean[['Day_Count', 'Apple_Price']].isnull().sum())


Unnamed: 0              0
Date                    0
Natural_Gas_Price       0
Natural_Gas_Vol.        0
Crude_oil_Price         0
Crude_oil_Vol.          0
Copper_Price            0
Copper_Vol.             0
Bitcoin_Price           0
Bitcoin_Vol.            0
Platinum_Price          0
Platinum_Vol.           0
Ethereum_Price          0
Ethereum_Vol.           0
S&P_500_Price           0
Nasdaq_100_Price        0
Nasdaq_100_Vol.         0
Apple_Price             0
Apple_Vol.              0
Tesla_Price             0
Tesla_Vol.              0
Microsoft_Price         0
Microsoft_Vol.          0
Silver_Price            0
Silver_Vol.             0
Google_Price            0
Google_Vol.             0
Nvidia_Price            0
Nvidia_Vol.             0
Berkshire_Price         0
Berkshire_Vol.          0
Netflix_Price           0
Netflix_Vol.            0
Amazon_Price            0
Amazon_Vol.             0
Meta_Price              0
Meta_Vol.               0
Gold_Price              0
Gold_Vol.   

In [12]:
model = LinearRegression()
model.fit(dataset_clean[['Day_Count']], dataset_clean['Apple_Price'])

# Predict on the clean dataset
dataset_clean['Predicted_Apple_Price'] = model.predict(dataset_clean[['Day_Count']])


In [13]:
import plotly.graph_objects as go

fig = go.Figure()

# Add traces for actual and predicted Apple Prices
fig.add_trace(go.Scatter(x=dataset_clean['Date'], y=dataset_clean['Apple_Price'], mode='lines', name='Actual Apple Price'))
fig.add_trace(go.Scatter(x=dataset_clean['Date'], y=dataset_clean['Predicted_Apple_Price'], mode='lines', name='Estimated Apple Price'))

# Update layout for range slider
fig.update_layout(
    title='Apple Stock Price Prediction with Range Slider',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1, label='1m', step='month', stepmode='backward'),
                dict(count=6, label='6m', step='month', stepmode='backward'),
                dict(step='all')
            ])
        ),
        rangeslider=dict(visible=True),
        type='date'
    ),
    yaxis_title='Price in USD'
)

fig.show()
