In [2]:
# ---------------------------------------------------------
# Task 02: Download, Clean, Resample, and Save Stock Data
# ---------------------------------------------------------
# Requirements:
# pip install yfinance pandas
# ---------------------------------------------------------

import yfinance as yf
import pandas as pd
import os

# -------------------- USER INPUTS -------------------------
symbol = input("Enter Stock Symbol (e.g., SBIN.NS, RELIANCE.NS): ").strip()
start_date = input("Enter Start Date (YYYY-MM-DD): ").strip()
end_date = input("Enter End Date (YYYY-MM-DD): ").strip()

print("\nAvailable timeframes: 1m, 2m, 5m, 15m, 30m, 1h, 1d, 1wk, 1mo")
timeframe = input("Enter Timeframe to DOWNLOAD (e.g., 5m, 15m, 1h, 1d): ").strip()

# -------------------- VALIDATION --------------------------
if not symbol or not start_date or not end_date or not timeframe:
    print("‚ùå All inputs are required! Please try again.")
    exit()

try:
    pd.to_datetime(start_date)
    pd.to_datetime(end_date)
except Exception:
    print("‚ùå Invalid date format! Use YYYY-MM-DD.")
    exit()

# -------------------- DOWNLOAD DATA -----------------------
print(f"\nüì• Downloading data for {symbol} from {start_date} to {end_date} ({timeframe})...")

try:
    data = yf.download(symbol, start=start_date, end=end_date, interval=timeframe, progress=False)
except Exception as e:
    print("‚ùå Error downloading data:", e)
    exit()

if data.empty:
    print("‚ö†Ô∏è No data found for the given symbol/date range/timeframe.")
    exit()

# -------------------- CLEAN DATA --------------------------
data = data.dropna()
data.index = pd.to_datetime(data.index)
data["date"] = data.index.date
data["time"] = data.index.time

data = data[["date", "time", "Open", "High", "Low", "Close", "Volume"]]
data.columns = ["date", "time", "open", "high", "low", "close", "volume"]

# -------------------- ASK FOR RESAMPLING ------------------
print("\nDo you want to RESAMPLE the data to a different timeframe?")
print("Example: If you downloaded 5m data, you can resample it to 15m, 1h, 1d, etc.")
resample_choice = input("Enter new timeframe to resample (or press Enter to skip): ").strip()

if resample_choice:
    print(f"\n‚è±Ô∏è Resampling data to {resample_choice} timeframe...")

    # Create datetime index for resampling
    data.index = pd.to_datetime(data["date"].astype(str) + " " + data["time"].astype(str))

    resampled = data.resample(resample_choice).agg({
        "open": "first",
        "high": "max",
        "low": "min",
        "close": "last",
        "volume": "sum"
    }).dropna()

    resampled["date"] = resampled.index.date
    resampled["time"] = resampled.index.time
    resampled = resampled[["date", "time", "open", "high", "low", "close", "volume"]]

else:
    resampled = data.copy()
    print("\n‚è© Skipped resampling ‚Äî keeping original timeframe.")

# -------------------- SAVE TO CSV -------------------------
folder = "data"
os.makedirs(folder, exist_ok=True)

filename = f"{folder}/{symbol}_{timeframe if not resample_choice else resample_choice}.csv"
resampled.to_csv(filename, index=False)

print("\n‚úÖ Data downloaded, cleaned, and saved successfully!")
print(f"üìÅ File saved as: {os.path.abspath(filename)}")

print("\nSample data preview:\n")
print(resampled.head())


Enter Stock Symbol (e.g., SBIN.NS, RELIANCE.NS):  SBIN.NS
Enter Start Date (YYYY-MM-DD):  2024-01-10
Enter End Date (YYYY-MM-DD):  2024-01-20



Available timeframes: 1m, 2m, 5m, 15m, 30m, 1h, 1d, 1wk, 1mo


Enter Timeframe to DOWNLOAD (e.g., 5m, 15m, 1h, 1d):  1d



üì• Downloading data for SBIN.NS from 2024-01-10 to 2024-01-20 (1d)...


  data = yf.download(symbol, start=start_date, end=end_date, interval=timeframe, progress=False)



Do you want to RESAMPLE the data to a different timeframe?
Example: If you downloaded 5m data, you can resample it to 15m, 1h, 1d, etc.


Enter new timeframe to resample (or press Enter to skip):  5d



‚è±Ô∏è Resampling data to 5d timeframe...

‚úÖ Data downloaded, cleaned, and saved successfully!
üìÅ File saved as: C:\Users\Hetvi\data\SBIN.NS_5d.csv

Sample data preview:

                  date      time        open        high         low  \
2024-01-10  2024-01-10  00:00:00  602.399194  613.679718  596.035846   
2024-01-15  2024-01-15  00:00:00  615.993705  622.357053  596.855320   

                 close    volume  
2024-01-10  610.883667  44897121  
2024-01-15  605.243469  88256507  
