In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
tata=pd.read_csv("/kaggle/input/tata-motors-stock-history/Tata_Motors_Ltd._historical_data.csv")
tata

In [None]:
# checking missing values
import missingno as msno
msno.matrix(tata)

# **EDA**

In [None]:
# Display the first few rows of the dataset
print(tata.head())



In [None]:
# Basic dataset information
print(tata.info())



In [None]:
# Summary statistics for numerical columns
print(tata.describe())


# **Plot the time series of 'Close' prices**

In [None]:
import matplotlib.pyplot as plt

# Plot the time series of 'Close' prices
plt.figure(figsize=(12, 6))
plt.plot(tata['Date'], tata['Close'], label='Close Price', color='b')
plt.title('TATA Motors Close Price Over Time')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.grid(True)
plt.show()



# **Rolling mean and standard deviation of 'Close' prices**

In [None]:
# Calculate and visualize rolling mean and standard deviation of 'Close' prices
rolling_window = 50  # Adjust the window size as needed
tata['RollingMean'] = tata['Close'].rolling(window=rolling_window).mean()
tata['RollingStd'] = tata['Close'].rolling(window=rolling_window).std()

plt.figure(figsize=(12, 6))
plt.plot(tata['Date'], tata['Close'], label='Close Price', color='b', alpha=0.7)
plt.plot(tata['Date'], tata['RollingMean'], label=f'{rolling_window}-Day Rolling Mean', color='r')
plt.plot(tata['Date'], tata['RollingStd'], label=f'{rolling_window}-Day Rolling Std Dev', color='g')
plt.title(f'TATA Motors Close Price with {rolling_window}-Day Rolling Statistics')
plt.xlabel('Date')
plt.ylabel('Close Price / Rolling Statistics')
plt.legend()
plt.grid(True)
plt.show()

# **Monthly Mean closing Price Time series**

In [None]:
# Convert the 'Date' column to a datetime object
tata['Date'] = pd.to_datetime(tata['Date'])

# Set 'Date' as the index
tata.set_index('Date', inplace=True)

# Resample data on a monthly basis and calculate the mean closing price
monthly_mean_close = tata['Close'].resample('M').mean()

# Plotting the monthly mean closing price
plt.figure(figsize=(12, 6))
plt.plot(monthly_mean_close.index, monthly_mean_close.values, label='Monthly Mean Closing Price', color='green')
plt.title('Monthly Mean Closing Price of Tata Motors Ltd.')
plt.xlabel('Date')
plt.ylabel('Price (INR)')
plt.xticks(rotation=45)
plt.legend()
plt.show()


# **Moving Averages**

In [None]:
# Calculate the 30-day and 60-day moving averages
tata['30_Day_MA'] = tata['Close'].rolling(window=30).mean()
tata['60_Day_MA'] = tata['Close'].rolling(window=60).mean()

# Plotting the moving averages
plt.figure(figsize=(12, 6))
plt.plot(tata.index, tata['Close'], label='Tata Motors Ltd. Close Price', color='blue')
plt.plot(tata.index, tata['30_Day_MA'], label='30-Day Avg', color='orange')
plt.plot(tata.index, tata['60_Day_MA'], label='60-Day Avg', color='green')
plt.title('Tata Motors Ltd. Closing Price and Moving Averages')
plt.xlabel('Date')
plt.ylabel('Price (INR)')
plt.xticks(rotation=45)
plt.legend()
plt.show()


# **Returns Calculation**

In [None]:
# Calculate daily returns
tata['Daily_Return'] = tata['Close'].pct_change()

# Plotting the daily returns
plt.figure(figsize=(12, 6))
plt.plot(tata.index, tata['Daily_Return'], label='Daily Returns', color='purple')
plt.title('Tata Motors Ltd. Daily Returns')
plt.xlabel('Date')
plt.ylabel('Return')
plt.xticks(rotation=45)
plt.legend()
plt.show()


# **Volatility Analysis**

In [None]:
# Calculate rolling standard deviation for volatility
tata['Volatility'] = tata['Daily_Return'].rolling(window=30).std()

# Plotting the volatility
plt.figure(figsize=(12, 6))
plt.plot(tata.index, tata['Volatility'], label='Volatility', color='red')
plt.title('Tata Motors Ltd. Volatility Analysis')
plt.xlabel('Date')
plt.ylabel('Volatility')
plt.xticks(rotation=45)
plt.legend()
plt.show()


In [None]:
pip install yfinance

# **Correlation with Other Stocks**

In [None]:
import yfinance as yf

# Example: Correlation with Nifty 50
nifty = yf.download('^NSEI', start='2000-01-01', end='2023-09-02')  # Download Nifty 50 data
nifty['TATA_MOTORS'] = tata['Close']  # Add Tata Motors data
correlation = tata['Close'].corr(nifty['Close'])  # Calculate correlation

print(f"Correlation between Tata Motors and Nifty 50: {correlation:.2f}")


In [None]:
tata.head()

# **Implement a use case with machine learning models from scikit-learn (sklearn) for predicting the opening and closing prices of Tata Motors stock for the next 10 days and calculate the difference in price.**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt

tata=pd.read_csv("/kaggle/input/tata-motors-stock-history/Tata_Motors_Ltd._historical_data.csv")

tata['Date'] = pd.to_datetime(tata['Date'])
tata.set_index('Date', inplace=True)

# Prepare the features and target variables
X = np.arange(len(tata)).reshape(-1, 1)  # Use the index as a feature
y_open = tata['Open']
y_close = tata['Close']

# Split the data into training and testing sets
X_train, X_test, y_open_train, y_open_test, y_close_train, y_close_test = train_test_split(
    X, y_open, y_close, test_size=0.2, shuffle=False
)

# Train regression models
lr_open = LinearRegression()
lr_open.fit(X_train, y_open_train)

lr_close = LinearRegression()
lr_close.fit(X_train, y_close_train)

# Make predictions for the next 10 days
next_10_days = np.arange(len(tata), len(tata) + 10).reshape(-1, 1)
predicted_open = lr_open.predict(next_10_days)
predicted_close = lr_close.predict(next_10_days)

# Calculate price differences
price_diff_open = predicted_open[-1] - tata['Open'].iloc[-1]
price_diff_close = predicted_close[-1] - tata['Close'].iloc[-1]

# Suggest whether buying is sensible
suggestion_open = "Buy" if price_diff_open > 0 else "Sell"
suggestion_close = "Buy" if price_diff_close > 0 else "Sell"

# Plot the predictions
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(X, y_open, label="Actual Opening Price")
plt.plot(next_10_days, predicted_open, label="Predicted Opening Price")
plt.title("Opening Price Prediction")
plt.xlabel("Days")
plt.ylabel("Price")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(X, y_close, label="Actual Closing Price")
plt.plot(next_10_days, predicted_close, label="Predicted Closing Price")
plt.title("Closing Price Prediction")
plt.xlabel("Days")
plt.ylabel("Price")
plt.legend()

plt.tight_layout()
plt.show()

print(f"Predicted opening price change for the next 10 days: {price_diff_open:.2f} INR ({suggestion_open})")
print(f"Predicted closing price change for the next 10 days: {price_diff_close:.2f} INR ({suggestion_close})")


# **Regression models from scikit-learn (sklearn) for predicting the opening and closing prices of Tata Motors stock and calculate the difference in prices for the next 10 days.**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
import matplotlib.pyplot as plt

# Load the dataset
tata=pd.read_csv("/kaggle/input/tata-motors-stock-history/Tata_Motors_Ltd._historical_data.csv")
tata['Date'] = pd.to_datetime(tata['Date'])
tata.set_index('Date', inplace=True)

# Prepare the features and target variables
X = np.arange(len(tata)).reshape(-1, 1)  # Use the index as a feature
y_open = tata['Open']
y_close = tata['Close']

# Split the data into training and testing sets
X_train, X_test, y_open_train, y_open_test, y_close_train, y_close_test = train_test_split(
    X, y_open, y_close, test_size=0.2, shuffle=False
)

# Initialize regression models
models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree Regression": DecisionTreeRegressor(),
    "Random Forest Regression": RandomForestRegressor(),
    "Support Vector Regression": SVR(),
    "Gradient Boosting Regression": GradientBoostingRegressor(),
}

predictions_open = {}
predictions_close = {}

# Train and predict for each model
for model_name, model in models.items():
    # Fit the model for opening prices
    model.fit(X_train, y_open_train)
    predicted_open = model.predict(X_test)
    predictions_open[model_name] = predicted_open

    # Fit the model for closing prices
    model.fit(X_train, y_close_train)
    predicted_close = model.predict(X_test)
    predictions_close[model_name] = predicted_close

# Predict for the next 10 days
next_10_days = np.arange(len(tata), len(tata) + 10).reshape(-1, 1)

predicted_open_10_days = {}
predicted_close_10_days = {}

for model_name, model in models.items():
    # Predict opening prices for the next 10 days
    model.fit(X, y_open)
    predicted_open_10 = model.predict(next_10_days)
    predicted_open_10_days[model_name] = predicted_open_10

    # Predict closing prices for the next 10 days
    model.fit(X, y_close)
    predicted_close_10 = model.predict(next_10_days)
    predicted_close_10_days[model_name] = predicted_close_10

# Calculate price differences for the last day of testing
actual_open_last_day = y_open_test.iloc[-1]
actual_close_last_day = y_close_test.iloc[-1]

price_diff_open = {model_name: predicted_open_10_days[model_name][-1] - actual_open_last_day for model_name in models}
price_diff_close = {model_name: predicted_close_10_days[model_name][-1] - actual_close_last_day for model_name in models}

# Suggest whether buying is sensible based on the predicted price changes
suggestions_open = {model_name: "Buy" if price_diff > 0 else "Sell" for model_name, price_diff in price_diff_open.items()}
suggestions_close = {model_name: "Buy" if price_diff > 0 else "Sell" for model_name, price_diff in price_diff_close.items()}

# Print the price differences and suggestions
print("Predicted opening price changes for the next 10 days:")
for model_name, price_diff in price_diff_open.items():
    print(f"{model_name}: {price_diff:.2f} INR ({suggestions_open[model_name]})")

print("\nPredicted closing price changes for the next 10 days:")
for model_name, price_diff in price_diff_close.items():
    print(f"{model_name}: {price_diff:.2f} INR ({suggestions_close[model_name]})")

# Plot the predictions for opening and closing prices
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(X_test, y_open_test, label="Actual Opening Price", color='blue')
for model_name, predicted_open in predictions_open.items():
    plt.plot(X_test, predicted_open, label=f"Predicted Open ({model_name})")
plt.title("Opening Price Predictions")
plt.xlabel("Days")
plt.ylabel("Price")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(X_test, y_close_test, label="Actual Closing Price", color='blue')
for model_name, predicted_close in predictions_close.items():
    plt.plot(X_test, predicted_close, label=f"Predicted Close ({model_name})")
plt.title("Closing Price Predictions")
plt.xlabel("Days")
plt.ylabel("Price")
plt.legend()

plt.tight_layout()
plt.show()
