In [None]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

data = yf.download('INFY.NS', start='2025-01-01', end='2025-02-06')
df = pd.DataFrame(data)

# Dropping Volume column
df.drop('Volume', inplace=True, axis=1)

# Creating previous day features
df['Prev Close'] = df['Close'].shift(1)
df['Prev High'] = df['High'].shift(1)
df['Prev Low'] = df['Low'].shift(1)
df['Prev Open'] = df['Open'].shift(1)

df.dropna(inplace=True)

# Defining features and target
X = df[['Prev Close', 'Prev Low', 'Prev Open', 'Prev High']]
y = df[['Open', 'High', 'Low', 'Close']]

# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

# Training Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predicting next day prices
latest_data = X.iloc[-1].values.reshape(1, -1)
predicted_values = model.predict(latest_data)
pred_open, pred_high, pred_low, pred_close = predicted_values[0]

print(f"📊 **Predicted Prices for Next Day:**")
print(f"📌 Open:  {pred_open:.2f}")
print(f"📌 High:  {pred_high:.2f}")
print(f"📌 Low:   {pred_low:.2f}")
print(f"📌 Close: {pred_close:.2f}")

# Predict on test set for visualization
y_pred = model.predict(X_test)

# Plotting Actual vs Predicted Close Prices
plt.figure(figsize=(12, 6))
plt.plot(y_test.index, y_test['Close'], label='Actual Close Price', color='blue', marker='o')
plt.plot(y_test.index, y_pred[:, 3], label='Predicted Close Price', color='red', linestyle='dashed', marker='x')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.title('Actual vs Predicted Close Prices for HDFC Bank')
plt.legend()
plt.xticks(rotation=45)
plt.grid()
plt.tight_layout()
plt.show()


In [4]:
pip install yfinance scikit-learn pandas matplotlib

Note: you may need to restart the kernel to use updated packages.


In [11]:
pip install investpy


Collecting investpy
  Downloading investpy-1.0.8.tar.gz (4.4 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.4/4.4 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m[31m10.4 MB/s[0m eta [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting Unidecode>=1.1.1 (from investpy)
  Downloading Unidecode-1.4.0-py3-none-any.whl.metadata (13 kB)
Downloading Unidecode-1.4.0-py3-none-any.whl (235 kB)
Building wheels for collected packages: investpy
  Building wheel for investpy (setup.py) ... [?25ldone
[?25h  Created wheel for investpy: filename=investpy-1.0.8-py3-none-any.whl size=4481564 sha256=999ad29c664494f8eef92512c09bc0bfe5d8e69d35c1495be2ea2fde695bf05b
  Stored in directory: /home/bhavya/.cache/pip/wheels/dd/f4/e0/4807b8928ec08b376e1e1f1e1cc597a747cdd8356c37ee618f
Successfully built investpy
Installing collected packages: Unidecode, investpy
Successfully installed Unidecode-1.4.0 investpy-1.0.8
Note: you may need to restart t

In [13]:
import investpy

# Example: Reliance Industries from NSE
data = investpy.get_stock_historical_data(stock='RELIANCE',
                                          country='india',
                                          from_date='01/01/2023',
                                          to_date='01/01/2024')
print(data.head())

RuntimeError: ERR#0018: stock reliance not found, check if it is correct.

In [19]:
pip install pandas_datareader scikit-learn matplotlib


Note: you may need to restart the kernel to use updated packages.


In [21]:
import pandas_datareader.data as web
import pandas as pd
from datetime import datetime
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Step 1: Fetch data from Yahoo Finance
start = datetime(2023, 1, 1)
end = datetime(2024, 1, 1)

symbol = 'AAPL'  # You can change to MSFT, TSLA, INFY.NS, etc.

try:
    df = web.DataReader(symbol, 'yahoo', start, end)
except Exception as e:
    print(f"❌ Failed to fetch data: {e}")
    exit()

# Step 2: Prepare data
df = df[['Close']]
df['Target'] = df['Close'].shift(-1)
df.dropna(inplace=True)

# Step 3: Split data
X = df[['Close']]
y = df['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)

# Step 4: Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 5: Predict
predicted = model.predict(X_test)

# Step 6: Plot results
plt.figure(figsize=(10, 5))
plt.plot(y_test.values, label='Actual Price', marker='o')
plt.plot(predicted, label='Predicted Price', marker='x')
plt.title(f'{symbol} Stock Price Prediction')
plt.xlabel('Days')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


❌ Failed to fetch data: Unable to read URL: https://finance.yahoo.com/quote/AAPL/history?period1=1672525800&period2=1704148199&interval=1d&frequency=1d&filter=history
Response Text:
b'<html><meta charset=\'utf-8\'>\n<script>\nif(window != window.top){\ndocument.write(\'<p>Content is currently unavailable.</p><img src="//geo.yahoo.com/p?s=1197757039&t=\'\n    + new Date().getTime() + \'&_R=\'\n    + encodeURIComponent(document.referrer)\n    + \'&err=404\'\n    + \'" width="0px" height="0px"/>\');\n}else{\nwindow.location.replace(\'https://\' + window.location.host + \'/?err=404\');\n}\n</script>\n<noscript>\n<META http-equiv="refresh" content="0;URL=https://finance.yahoo.com/?err=404">\n</noscript></html>'


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [2]:
pip install yahoo_fin

Collecting yahoo_fin
  Downloading yahoo_fin-0.8.9.1-py3-none-any.whl.metadata (699 bytes)
Collecting requests-html (from yahoo_fin)
  Downloading requests_html-0.10.0-py3-none-any.whl.metadata (15 kB)
Collecting feedparser (from yahoo_fin)
  Downloading feedparser-6.0.11-py3-none-any.whl.metadata (2.4 kB)
Collecting sgmllib3k (from feedparser->yahoo_fin)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting pyquery (from requests-html->yahoo_fin)
  Downloading pyquery-2.0.1-py3-none-any.whl.metadata (9.0 kB)
Collecting fake-useragent (from requests-html->yahoo_fin)
  Downloading fake_useragent-2.2.0-py3-none-any.whl.metadata (17 kB)
Collecting parse (from requests-html->yahoo_fin)
  Downloading parse-1.20.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting w3lib (from requests-html->yahoo_fin)
  Downloading w3lib-2.3.1-py3-none-any.whl.metadata (2.3 kB)
Collecting pyppeteer>=0.0.14 (from requests-html->yahoo_fin)
  Downloading pyppeteer

In [2]:
import yfinance as yf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import sys

# 1. Download historical stock data
ticker_symbol = 'AAPL'
start_date = '2020-01-01'
end_date = '2025-01-01'

try:
    stock_data = yf.download(ticker_symbol, start=start_date, end=end_date)
except Exception as e:
    print(f"Error downloading data: {e}")
    sys.exit()

if stock_data.empty:
    print("No data downloaded. Please check the ticker symbol and date range.")
    sys.exit()

# 2. Prepare the data
stock_data['Previous_Close'] = stock_data['Close'].shift(1)
stock_data.dropna(inplace=True)

# Check if there's enough data after dropping NaN values
if stock_data.empty:
    print("Not enough data to train the model after preparing the dataset.")
    sys.exit()

X = stock_data[['Previous_Close']]
y = stock_data['Close']

# 3. Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Create and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# 5. Make predictions
y_pred = model.predict(X_test)

# 6. Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error (MSE): {mse}")

# 7. Make a prediction for the next day (hypothetically)
last_close_price = stock_data['Close'].iloc[-1]
next_day_prediction = model.predict([[last_close_price]])

print(f"\nLast known closing price: ${last_close_price:.2f}")
print(f"Predicted closing price for the next day: ${next_day_prediction[0]:.2f}")

[*********************100%***********************]  1 of 1 completed

1 Failed download:
['AAPL']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


No data downloaded. Please check the ticker symbol and date range.


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [10]:
print('57BBTL11W16DSJNW')

57BBTL11W16DSJNW


In [12]:
pip install alpha_vantage

Collecting alpha_vantage
  Downloading alpha_vantage-3.0.0-py3-none-any.whl.metadata (12 kB)
Downloading alpha_vantage-3.0.0-py3-none-any.whl (35 kB)
Installing collected packages: alpha_vantage
Successfully installed alpha_vantage-3.0.0
Note: you may need to restart the kernel to use updated packages.


In [14]:
import pandas as pd
from alpha_vantage.timeseries import TimeSeries
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import sys


API_KEY = '57BBTL11W16DSJNW'
ticker_symbol = 'MSFT' # Using MSFT as an example
output_format = 'pandas'

# 2. Download historical stock data
try:
    ts = TimeSeries(key=API_KEY, output_format=output_format)
    # The 'full' output size gives you up to 20 years of data
    stock_data, meta_data = ts.get_daily(symbol=ticker_symbol, outputsize='full')
    
except Exception as e:
    print(f"Error downloading data from Alpha Vantage: {e}")
    print("Please check your API key, ticker symbol, or network connection.")
    sys.exit()

if stock_data.empty:
    print("No data downloaded. Please check the ticker symbol and date range.")
    sys.exit()

# 3. Clean and prepare the data
# The columns in the Alpha Vantage data are slightly different
# We rename them for consistency
stock_data.rename(columns={'4. close': 'Close'}, inplace=True)
stock_data.sort_index(inplace=True)

# We'll use the 'Close' price as our target variable (y)
# and the previous day's 'Close' price as our feature (X).
stock_data['Previous_Close'] = stock_data['Close'].shift(1)
stock_data.dropna(inplace=True)

# Check if there's enough data after dropping NaN values
if stock_data.empty:
    print("Not enough data to train the model after preparing the dataset.")
    sys.exit()

X = stock_data[['Previous_Close']]
y = stock_data['Close']

# 4. Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Create and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# 6. Make predictions
y_pred = model.predict(X_test)

# 7. Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error (MSE): {mse}")

# 8. Make a prediction for the next day (hypothetically)
last_close_price = stock_data['Close'].iloc[-1]
next_day_prediction = model.predict([[last_close_price]])

print(f"\nLast known closing price: ${last_close_price:.2f}")
print(f"Predicted closing price for the next day: ${next_day_prediction[0]:.2f}")

Mean Squared Error (MSE): 6.197198261588007

Last known closing price: $533.50
Predicted closing price for the next day: $533.98




In [16]:
import pandas as pd
from alpha_vantage.timeseries import TimeSeries
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import sys

API_KEY = '57BBTL11W16DSJNW'
ticker_symbol = 'INFY'
output_format = 'pandas'

try:
    ts = TimeSeries(key=API_KEY, output_format=output_format)
    stock_data, meta_data = ts.get_daily(symbol=ticker_symbol, outputsize='full')
    
except Exception as e:
    print(f"Error downloading data from Alpha Vantage: {e}")
    print("Please check your API key, ticker symbol, or network connection.")
    sys.exit()

if stock_data.empty:
    print("No data downloaded. Please check the ticker symbol and date range.")
    sys.exit()

stock_data.rename(columns={'4. close': 'Close'}, inplace=True)
stock_data.sort_index(inplace=True)

stock_data['Previous_Close'] = stock_data['Close'].shift(1)
stock_data.dropna(inplace=True)

if stock_data.empty:
    print("Not enough data to train the model after preparing the dataset.")
    sys.exit()

X = stock_data[['Previous_Close']]
y = stock_data['Close']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error (MSE): {mse}")

last_close_price = stock_data['Close'].iloc[-1]
next_day_prediction = model.predict([[last_close_price]])

print(f"\nLast known closing price: ${last_close_price:.2f}")
print(f"Predicted closing price for the next day: ${next_day_prediction[0]:.2f}")

comparison_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
comparison_df.sort_index(inplace=True)
print("\nPredicted vs Actual Prices:")
print(comparison_df.tail())

plt.figure(figsize=(14, 7))
plt.plot(comparison_df.index, comparison_df['Actual'], label='Actual Price', color='blue', linewidth=2)
plt.plot(comparison_df.index, comparison_df['Predicted'], label='Predicted Price', color='red', linestyle='--', linewidth=2)
plt.title(f'{ticker_symbol} Stock Price Prediction (Last 20%)')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()


Error downloading data from Alpha Vantage: HTTPSConnectionPool(host='www.alphavantage.co', port=443): Max retries exceeded with url: /query?function=TIME_SERIES_DAILY&symbol=INFY&outputsize=full&apikey=57BBTL11W16DSJNW&datatype=json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x72937969eed0>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution'))
Please check your API key, ticker symbol, or network connection.


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [32]:
import pandas as pd
import requests
import json
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import sys

API_KEY = '57BBTL11W16DSJNW'
ticker_symbol = 'HDFCBANK'

url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={ticker_symbol}&outputsize=full&apikey={API_KEY}&datatype=json'
response = requests.get(url)
response.raise_for_status()
data = json.loads(response.text)

if "Time Series (Daily)" not in data:
    print("Error: No data found for the ticker symbol. Please check if the symbol is correct.")
    print(data)
    sys.exit()

stock_data = pd.DataFrame.from_dict(data["Time Series (Daily)"], orient="index")
stock_data.index = pd.to_datetime(stock_data.index)
stock_data = stock_data.astype(float)

if stock_data.empty:
    print("No data downloaded. Please check the ticker symbol.")
    sys.exit()

stock_data.rename(columns={'4. close': 'Close'}, inplace=True)
stock_data.sort_index(inplace=True)

stock_data['Previous_Close'] = stock_data['Close'].shift(1)
stock_data.dropna(inplace=True)

if stock_data.empty:
    print("Not enough data to train the model after preparing the dataset.")
    sys.exit()

X = stock_data[['Previous_Close']]
y = stock_data['Close']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error (MSE): {mse}")

last_close_price = stock_data['Close'].iloc[-1]
next_day_prediction = model.predict([[last_close_price]])

print(f"\nLast known closing price: {last_close_price:.2f} Rupees")
print(f"Predicted closing price for the next day: {next_day_prediction[0]:.2f} Rupees")

comparison_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
comparison_df.sort_index(inplace=True)
print("\nPredicted vs Actual Prices:")
print(comparison_df.tail())

plt.figure(figsize=(14, 7))
plt.plot(comparison_df.index, comparison_df['Actual'], label='Actual Price', color='blue', linewidth=2)
plt.plot(comparison_df.index, comparison_df['Predicted'], label='Predicted Price', color='red', linestyle='--', linewidth=2)
plt.title(f'{ticker_symbol} Stock Price Prediction (Last 20%)')
plt.xlabel('Date')
plt.ylabel('Price in Rupees')
plt.legend()
plt.grid(True)
plt.show()


Error: No data found for the ticker symbol. Please check if the symbol is correct.
{'Error Message': 'Invalid API call. Please retry or visit the documentation (https://www.alphavantage.co/documentation/) for TIME_SERIES_DAILY.'}


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
