In [2]:
pip install yfinance pandas numpy matplotlib seaborn sqlalchemy psycopg2-binary openpyxl

Note: you may need to restart the kernel to use updated packages.


In [2]:
import yfinance as yf
import pandas as pd
from datetime import datetime
import time

stocks = {
    'TCS.NS': 'IT', 'INFY.NS': 'IT', 'WIPRO.NS': 'IT',
    'HCLTECH.NS': 'IT', 'TECHM.NS': 'IT',
    'HDFCBANK.NS': 'Banking', 'ICICIBANK.NS': 'Banking',
    'KOTAKBANK.NS': 'Banking', 'AXISBANK.NS': 'Banking',
    'SBIN.NS': 'Banking', 'BAJFINANCE.NS': 'Banking',
    'BAJAJFINSV.NS': 'Banking',
    'HINDUNILVR.NS': 'FMCG', 'ITC.NS': 'FMCG',
    'NESTLEIND.NS': 'FMCG', 'BRITANNIA.NS': 'FMCG',
    'SUNPHARMA.NS': 'Pharma', 'DRREDDY.NS': 'Pharma',
    'CIPLA.NS': 'Pharma', 'DIVISLAB.NS': 'Pharma',
    'RELIANCE.NS': 'Energy', 'ONGC.NS': 'Energy',
    'POWERGRID.NS': 'Energy', 'NTPC.NS': 'Energy',
    'COALINDIA.NS': 'Energy', 'BPCL.NS': 'Energy',
    'MARUTI.NS': 'Auto', 'M&M.NS': 'Auto',
    'BAJAJ-AUTO.NS': 'Auto', 'HEROMOTOCO.NS': 'Auto', 
    'EICHERMOT.NS': 'Auto',
    'TATASTEEL.NS': 'Metals', 'HINDALCO.NS': 'Metals',
    'JSWSTEEL.NS': 'Metals',
    'BHARTIARTL.NS': 'Telecom',
    'ASIANPAINT.NS': 'Consumer', 'ULTRACEMCO.NS': 'Consumer',
    'TITAN.NS': 'Consumer', 'ADANIENT.NS': 'Consumer',
    'ADANIPORTS.NS': 'Consumer',
    'LT.NS': 'Infrastructure', 'GRASIM.NS': 'Infrastructure',
    'INDUSINDBK.NS': 'Banking', 'APOLLOHOSP.NS': 'Healthcare',
    'SBILIFE.NS': 'Finance', 'HDFCLIFE.NS': 'Finance',
    'TATACONSUM.NS': 'FMCG', 'SHRIRAMFIN.NS': 'Finance'
}

start = "2021-01-01"
end = datetime.now().strftime('%Y-%m-%d')

data_list = []
failed = []

print(f"Downloading {len(stocks)} stocks...")
print("-" * 70)

for ticker, sector in stocks.items():
    try:
        df = yf.download(ticker, start=start, end=end, auto_adjust=False, progress=False)
        
        if df.empty:
            print(f"× {ticker:20} - no data")
            failed.append(ticker)
            continue
        
        df.reset_index(inplace=True)
        
        # fix column names if MultiIndex
        if isinstance(df.columns, pd.MultiIndex):
            df.columns = df.columns.get_level_values(0)
        
        df['ticker'] = ticker
        df['sector'] = sector
        df['company'] = ticker.replace('.NS', '')
        
        data_list.append(df)
        print(f"✓ {ticker:20} - {len(df)} rows")
        
        time.sleep(0.05)
        
    except Exception as e:
        print(f"× {ticker:20} - {str(e)[:50]}")
        failed.append(ticker)

print("-" * 70)

if not data_list:
    print("No data collected!")
    raise SystemExit

# combine
full_data = pd.concat(data_list, ignore_index=True)

# standardize column names
full_data.columns = [col.lower().replace(' ', '_') for col in full_data.columns]

# date column
full_data['date'] = pd.to_datetime(full_data['date'])

# sort data
full_data.sort_values(['ticker', 'date'], inplace=True)
full_data.reset_index(drop=True, inplace=True)

print(f"\nDownload complete!")
print(f"Records: {len(full_data):,}")
print(f"Stocks: {full_data['ticker'].nunique()}")
print(f"Dates: {full_data['date'].min().date()} to {full_data['date'].max().date()}")
print(f"Columns: {list(full_data.columns)}")

if failed:
    print(f"\nSkipped ({len(failed)}): {', '.join(failed)}")

# save
full_data.to_csv('../data/nifty50_raw.csv', index=False)
print(f"\nSaved: ../data/nifty50_raw.csv")
print(f"Shape: {full_data.shape[0]} rows × {full_data.shape[1]} cols")

print("\nSample data:")
print(full_data.head(3))
print("\nData types:")
print(full_data.dtypes)

Downloading 48 stocks...
----------------------------------------------------------------------
✓ TCS.NS               - 1270 rows
✓ INFY.NS              - 1270 rows
✓ WIPRO.NS             - 1270 rows
✓ HCLTECH.NS           - 1270 rows
✓ TECHM.NS             - 1270 rows
✓ HDFCBANK.NS          - 1270 rows
✓ ICICIBANK.NS         - 1270 rows
✓ KOTAKBANK.NS         - 1270 rows
✓ AXISBANK.NS          - 1270 rows
✓ SBIN.NS              - 1270 rows
✓ BAJFINANCE.NS        - 1270 rows
✓ BAJAJFINSV.NS        - 1270 rows
✓ HINDUNILVR.NS        - 1270 rows
✓ ITC.NS               - 1270 rows
✓ NESTLEIND.NS         - 1270 rows
✓ BRITANNIA.NS         - 1270 rows
✓ SUNPHARMA.NS         - 1270 rows
✓ DRREDDY.NS           - 1270 rows
✓ CIPLA.NS             - 1270 rows
✓ DIVISLAB.NS          - 1270 rows
✓ RELIANCE.NS          - 1270 rows
✓ ONGC.NS              - 1270 rows
✓ POWERGRID.NS         - 1270 rows
✓ NTPC.NS              - 1270 rows
✓ COALINDIA.NS         - 1270 rows
✓ BPCL.NS              - 1270

In [4]:
# Check what columns we have
print("Current columns:", df.columns.tolist())
print("\nFirst few rows:")
print(df.head(3))

# Rename columns to clean names
df = df.rename(columns={
    'Date': 'date',
    'Open': 'open',
    'High': 'high',
    'Low': 'low',
    'Close': 'close',
    'Adj Close': 'adj_close',
    'Volume': 'volume'
})

# Convert date to proper datetime
df['date'] = pd.to_datetime(df['date'])

# Sort by ticker and date
df = df.sort_values(['ticker', 'date']).reset_index(drop=True)

# Save to CSV
df.to_csv('../data/nifty50_raw.csv', index=False)

print("\n✅ Data saved to: ../data/nifty50_raw.csv")
print(f"✅ Shape: {df.shape[0]} rows × {df.shape[1]} columns")
print(f"\nColumn names: {list(df.columns)}")
print(f"\nData types:\n{df.dtypes}")

Current columns: [('Date', ''), ('Close', 'TCS.NS'), ('High', 'TCS.NS'), ('Low', 'TCS.NS'), ('Open', 'TCS.NS'), ('Volume', 'TCS.NS'), ('ticker', ''), ('sector', ''), ('company', ''), ('Close', 'INFY.NS'), ('High', 'INFY.NS'), ('Low', 'INFY.NS'), ('Open', 'INFY.NS'), ('Volume', 'INFY.NS'), ('Close', 'WIPRO.NS'), ('High', 'WIPRO.NS'), ('Low', 'WIPRO.NS'), ('Open', 'WIPRO.NS'), ('Volume', 'WIPRO.NS'), ('Close', 'HCLTECH.NS'), ('High', 'HCLTECH.NS'), ('Low', 'HCLTECH.NS'), ('Open', 'HCLTECH.NS'), ('Volume', 'HCLTECH.NS'), ('Close', 'TECHM.NS'), ('High', 'TECHM.NS'), ('Low', 'TECHM.NS'), ('Open', 'TECHM.NS'), ('Volume', 'TECHM.NS'), ('Close', 'HDFCBANK.NS'), ('High', 'HDFCBANK.NS'), ('Low', 'HDFCBANK.NS'), ('Open', 'HDFCBANK.NS'), ('Volume', 'HDFCBANK.NS'), ('Close', 'ICICIBANK.NS'), ('High', 'ICICIBANK.NS'), ('Low', 'ICICIBANK.NS'), ('Open', 'ICICIBANK.NS'), ('Volume', 'ICICIBANK.NS'), ('Close', 'KOTAKBANK.NS'), ('High', 'KOTAKBANK.NS'), ('Low', 'KOTAKBANK.NS'), ('Open', 'KOTAKBANK.NS'), (

In [4]:
from sqlalchemy import create_engine
from urllib.parse import quote_plus
import pandas as pd

df = pd.read_csv('../data/nifty50_raw.csv')
df['date'] = pd.to_datetime(df['date'])

print(f"CSV loaded: {len(df):,} rows, {df['ticker'].nunique()} stocks")

# YOUR PASSWORD - put it here exactly as it is
my_password = "akhi@333"  # ← REPLACE WITH YOUR ACTUAL PASSWORD

# this encodes special characters like @ so they work in the URL
safe_password = quote_plus(my_password)

# build connection string
conn_str = f'postgresql://postgres:{safe_password}@localhost:5432/market_db'
engine = create_engine(conn_str)

# test connection
try:
    pd.read_sql("SELECT 1", engine)
    print("✓ Database connected")
except Exception as e:
    print(f"× Failed: {e}")
    print("\nMake sure:")
    print("1. PostgreSQL is running")
    print("2. market_db database exists")
    print("3. Password is correct")
    raise

# upload data
print("\nUploading to database...")
df.to_sql('stock_prices', engine, if_exists='replace', index=False, chunksize=1000)
print("✓ Done!")

# check what got loaded
check = pd.read_sql("""
    SELECT COUNT(*) as rows,
           COUNT(DISTINCT ticker) as stocks,
           MIN(date)::date as first_date,
           MAX(date)::date as last_date
    FROM stock_prices
""", engine)

print("\nDatabase verification:")
print(check)

CSV loaded: 60,960 rows, 48 stocks
✓ Database connected

Uploading to database...
✓ Done!

Database verification:
    rows  stocks  first_date   last_date
0  60960      48  2021-01-01  2026-02-18
