In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# Load the dataset
df = pd.read_csv('/input/stock-exchange-data/indexProcessed.csv')  
print("First few rows of the dataset:")
print(df.head())
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
invalid_dates = df[df['Date'].isna()]
if not invalid_dates.empty:
    print("Rows with invalid dates:")
    print(invalid_dates)
df.dropna(subset=['Date'], inplace=True)
df.set_index('Date', inplace=True)
print("Cleaned data:")
print(df.head())
df.dropna(inplace=True)
features = ['Open', 'High', 'Low', 'Volume']
target = 'Close'
X = df[features]  
y = df[target]    
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f'Root Mean Squared Error (RMSE): {rmse}')

plt.figure(figsize=(10,6))
plt.plot(y_test.index, y_test, color='blue', label='Actual Prices')
plt.plot(y_test.index, y_pred, color='red', label='Predicted Prices')
plt.title('Stock Price Prediction: Actual vs Predicted')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

  
future_data = df[features].iloc[-1:].values
scaled_future_data = scaler.transform(future_data)
predicted_price = model.predict(scaled_future_data)
print(f"Predicted Close price for the next day: {predicted_price[0]}")
