In [62]:
import numpy as np
import pandas as pd
import glob
from tqdm import tqdm
import os

In [53]:
TICKERS = glob.glob(f"Data/Ticker_Data/*.csv")
INDIAN_BONDS = glob.glob("Data/Bonds/Indian/*.csv")
US_BONDS = glob.glob("Data/Bonds/US/*.csv")
COMMODITIES = glob.glob("Data/Commodities/*.csv")
CRYPTO = glob.glob("Data/Crypto/*.csv")
CURRENCIES = glob.glob("Data/Currencies/*.csv")
NASDAQ200 = glob.glob("Data/Stocks/nasdaq200/*.csv")
NIFTY200 = glob.glob("Data/Stocks/nifty200/*.csv")
INDICES = glob.glob("Data/Indices/*.csv")
ALL_STRING = ["Indian_Bonds", "US_Bonds", "Commodities", "Crypto", "Currencies", "NASDAQ200", "NIFTY200", "Indices"]

print("Number of INDIAN_BONDS: ", len(INDIAN_BONDS))
print("Number of US_BONDS: ", len(US_BONDS))
print("Number of Commodities: ", len(COMMODITIES))
print("Number of Crypto: ", len(CRYPTO))
print("Number of Currencies: ", len(CURRENCIES))
print("Number of NASDAQ200: ", len(NASDAQ200))
print("Number of NIFTY200: ", len(NIFTY200))
print("Number of Indices: ", len(INDICES))

Number of INDIAN_BONDS:  9
Number of US_BONDS:  4
Number of Commodities:  14
Number of Crypto:  20
Number of Currencies:  26
Number of NASDAQ200:  200
Number of NIFTY200:  200
Number of Indices:  24


In [20]:
sample_stock = pd.read_csv(NASDAQ200[0])
sample_stock.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1980-12-12 00:00:00-05:00,0.099874,0.100308,0.099874,0.099874,469033600
1,1980-12-15 00:00:00-05:00,0.095098,0.095098,0.094663,0.094663,175884800
2,1980-12-16 00:00:00-05:00,0.088149,0.088149,0.087715,0.087715,105728000
3,1980-12-17 00:00:00-05:00,0.089886,0.09032,0.089886,0.089886,86441600
4,1980-12-18 00:00:00-05:00,0.092492,0.092927,0.092492,0.092492,73449600


In [21]:
sample_index = pd.read_csv(INDICES[0])
sample_index.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1984-08-03,706.200012,718.0,706.200012,713.0,0.0
1,1984-08-06,714.900024,736.799988,714.900024,736.799988,0.0
2,1984-08-07,736.799988,736.799988,728.0,728.5,0.0
3,1984-08-08,728.0,730.799988,724.5,728.5,0.0
4,1984-08-15,727.599976,735.099976,726.799988,735.099976,0.0


In [57]:
def get_info(file):
    df = pd.read_csv(file)
    if len(df)<=1:
        return [None]*7
    ticker = file.split("/")[-1].split(".")[0]
    df["Date"] = pd.to_datetime(df["Date"])
    df = df.sort_values(by="Date", ascending=True)
    high = df["High"].max()
    low = df["Low"].min()
    last = df["Close"].iloc[-1]
    length = len(df)
    start_date = df['Date'].iloc[0].strftime('%Y-%m-%d')
    end_date = df['Date'].iloc[-1].strftime('%Y-%m-%d')
    return [ticker, length, start_date, end_date, high, low, last]

In [50]:
def save(name):
    info = []
    for file in tqdm(eval(name.upper()), desc="Saving Info"):
        info.append(get_info(file))

    info = pd.DataFrame(info, columns=["Ticker", "Length", "Start Date", "End Date", "Highest", "Lowest", "Last Price"])
    info.head()
    info.to_csv(f"Data/Info/{name.lower()}_info.csv", index=False)

In [58]:
for a in ALL_STRING:
    print(a, end="\r")
    save(a)

Indian_Bonds

Saving Info: 100%|██████████| 9/9 [00:00<00:00, 68.65it/s]


US_Bonds

Saving Info: 100%|██████████| 4/4 [00:00<00:00, 35.14it/s]


Commodities

Saving Info: 100%|██████████| 14/14 [00:00<00:00, 78.47it/s]


Crypto

Saving Info: 100%|██████████| 20/20 [00:00<00:00, 124.83it/s]


Currencies

Saving Info: 100%|██████████| 26/26 [00:00<00:00, 63.09it/s]


NASDAQ200

Saving Info: 100%|██████████| 200/200 [00:04<00:00, 47.75it/s]


NIFTY200

Saving Info: 100%|██████████| 200/200 [00:01<00:00, 110.62it/s]


Indices

Saving Info: 100%|██████████| 24/24 [00:00<00:00, 61.96it/s]


In [60]:
nifty200 = pd.read_csv("Data/Info/nifty200_info.csv")
nifty200.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Ticker      166 non-null    object 
 1   Length      166 non-null    float64
 2   Start Date  166 non-null    object 
 3   End Date    166 non-null    object 
 4   Highest     166 non-null    float64
 5   Lowest      166 non-null    float64
 6   Last Price  166 non-null    float64
dtypes: float64(4), object(3)
memory usage: 11.1+ KB


In [63]:
for n in NIFTY200:
    df = pd.read_csv(n)
    if len(df)<=1:
        os.remove(n)

In [65]:
nifty200.dropna(inplace=True)
nifty200.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 166 entries, 0 to 197
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Ticker      166 non-null    object 
 1   Length      166 non-null    float64
 2   Start Date  166 non-null    object 
 3   End Date    166 non-null    object 
 4   Highest     166 non-null    float64
 5   Lowest      166 non-null    float64
 6   Last Price  166 non-null    float64
dtypes: float64(4), object(3)
memory usage: 10.4+ KB


In [66]:
nifty200.to_csv("Data/Info/nifty200_info.csv", index=False)