<a href="https://www.kaggle.com/code/ayushkhaire/real-time-data-update?scriptVersionId=217860312" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import requests as rq
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
import time 
from tqdm import tqdm
import os
from datetime import datetime,timedelta
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
import logging
from kaggle_secrets import UserSecretsClient
import warnings
import json
import shutil
import subprocess
import gc

warnings.filterwarnings('ignore')

# Configuration

In [2]:
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)


logger.info("This is an INFO message")

# Secrets

In [3]:
user_secrets = UserSecretsClient()
kaggle_apikey = user_secrets.get_secret("kaggle_apikey")
kaggle_username = user_secrets.get_secret("kaggle_username")
mngodb_database_name = user_secrets.get_secret("mngodb_database_name")
mongodb_app_name = user_secrets.get_secret("mongodb_appname")
mongodb_password = user_secrets.get_secret("mongodb_password")
mongodb_username = user_secrets.get_secret("mongodb_username")
mongodb_cluster_name = user_secrets.get_secret("mongodb_cluster_name")

# MongoDB

In [4]:
class AtlasClient:
    def __init__(self, atlas_uri, dbname):
        self.mongodb_client = MongoClient(atlas_uri)
        self.database = self.mongodb_client[dbname]

    def ping(self):
        try:
            self.mongodb_client.admin.command('ping')
            logging.info("Pinged your MongoDB deployment. Connection successful.")
        except Exception as e:
            logging.error(f"Failed to connect to MongoDB: {e}")

    def get_collection(self, collection_name):
        collection = self.database[collection_name]
        return collection

    def findOneByKey(self,collection_name,key):
        collection = self.get_collection(collection_name)
        result = collection.find_one({ key: { "$exists": True } })
        return result


    def find(self, collection_name, filter={}, limit=0):
        collection = self.database[collection_name]
        items = list(collection.find(filter=filter, limit=limit))
        return items
    
    def insert(self, collection_name, documents):
        """
        Inserts one or more documents into a MongoDB collection.
        
        Parameters:
        - collection_name: str, the name of the collection
        - documents: dict or list of dicts, the document(s) to insert
        
        If `documents` is a list, it will insert multiple documents using `insert_many`.
        Otherwise, it will insert a single document using `insert_one`.
        """
        collection = self.get_collection(collection_name)
        
        if isinstance(documents, list):
            result = collection.insert_many(documents)
            return result.inserted_ids
        else:
            result = collection.insert_one(documents)
            return result.inserted_id
        
    def delete(self, collection_name, filter={}, _del_all_=False):
        """
        Deletes documents from a MongoDB collection based on the filter.
        
        Parameters:
        - collection_name: str, the name of the collection.
        - filter: dict, the filter to find documents to delete (default is {}).
        - _del_all_: bool, if True, deletes all documents matching the filter using `delete_many()`.
                      If False, deletes only one document using `delete_one()`.
        
        Returns:
        - Number of documents deleted.
        """
        collection = self.get_collection(collection_name)
        
        if _del_all_:
            result = collection.delete_many(filter)
            return result.deleted_count
        else:
            result = collection.delete_one(filter)
            if result.deleted_count == 1:
                pass
            else:
                pass
            return result.deleted_count

# Stocks manager

In [5]:
AC = AtlasClient(
    atlas_uri=f"mongodb+srv://{mongodb_username}:{mongodb_password}@{mongodb_cluster_name}.fznbh.mongodb.net/?retryWrites=true&w=majority&appName={mongodb_app_name}",
    dbname = mngodb_database_name
)


class stocksManager:
    def __init__(self) -> None:
        self.available_stocks = []
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
        }
        self.headers = headers
        self.firstrun = 0

    def collect_stock_symbols(self):
        targets = [
            '52-week-gainers', 
            '52-week-losers'
        ]   
    
        limitlist = []

        for page in tqdm(targets):
            url = f'https://finance.yahoo.com/markets/stocks/{page}/?start=0&count=100'
            # print(url)
            try:
                r = rq.get(url,headers = self.headers)
            except Exception as e:
                logger.warning("cannot hit url : ",url ,e,r.status_code)
            soup = BeautifulSoup(r.text,'html.parser')
            limits = soup.find(
                'div',{'class':'total'}
            ).text
            limits = limits.split(' ')[2]
            limitlist.append(limits)

        max_hits = []
        for limit in limitlist:
            max_hit = int(int(limit) / 100)
            max_hits.append(max_hit)

        findict = {
            'targets':targets,
            'max_hits':max_hits
        }
        
        urls_for_stocks = []

        i = 0
        for i in range(
            len(
                findict['targets']
                )
            ):
            target = findict['targets'][i]
            maxhit = findict['max_hits'][i]
            for m in range(maxhit+1):
                url = f'https://finance.yahoo.com/markets/stocks/{target}/?start={m*100}&count=100/'
                urls_for_stocks.append(url)

        data = []

        logger.info('collecting data for symbols _______________________________--')
        for u in urls_for_stocks:
            catg = u.split('/')[-3]
            symbol_list = []
            try:
                r = rq.get(u,headers = self.headers)
            except Exception as e:
                logger.warning("cannot hit url : ",u ,r.status_code)
            soup = BeautifulSoup(r.text,'html.parser')
            symbs= soup.find_all('span',{'class':'symbol'})
            for s in symbs:
                symbol_list.append(s.text)
            data.append(
                {catg:symbol_list}
            )
        logger.info("finished collecting data for symbols ______________________________-")
        data = {'names':data}
        return data
    
    def return_list_for_symbols(self):
        symbols = self.collect_stock_symbols()
        finals_symbols = []
        for n in symbols['names']:
            for key in n.keys():
                finals_symbols=finals_symbols+n[key]
        finals_symbols = list(set(finals_symbols))
        return finals_symbols

    def return_human_timestamp(self, timestamps):
            if isinstance(timestamps, list):
                new_dates = []
                for unix_time in timestamps:
                    try:
                        if isinstance(unix_time, str):
                            datetime.strptime(unix_time, '%Y-%m-%d %H:%M:%S') 
                            new_dates.append(unix_time)
                        else:
                            unix_time = float(unix_time)
                            date = datetime.fromtimestamp(unix_time).strftime('%Y-%m-%d %H:%M:%S')
                            new_dates.append(date)
                    except (ValueError, TypeError):
                        new_dates.append(None)  
                return new_dates
            elif isinstance(timestamps, str):
                try:
                    unix_time = float(timestamps)
                    date = datetime.fromtimestamp(unix_time).strftime('%Y-%m-%d %H:%M:%S')
                    return date
                except (ValueError, TypeError):
                    return None

    def return_unix_timestamps(self, date_strings):
        if isinstance(date_strings, list):
            unix_timestamps = []
            for date_str in date_strings:
                try:
                    dt = datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S')
                    unix_timestamp = int(dt.timestamp())  
                    unix_timestamps.append(unix_timestamp)
                except (ValueError, TypeError):
                    unix_timestamps.append(None)
            return unix_timestamps
        elif isinstance(date_strings, str):
            try:
                dt = datetime.strptime(date_strings, '%Y-%m-%d %H:%M:%S')
                unix_timestamp = int(dt.timestamp())  
                return unix_timestamp
            except (ValueError, TypeError):
                return None

    def update_prices_for_daily(self, symbol_list):
        current_timestamp = int(time.time())
        current_time = datetime.fromtimestamp(current_timestamp)
        human_readable_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
        
        # Start and end periods for data retrieval
        start_date_str = "2015-01-01"
        start_date_obj = datetime.strptime(start_date_str, "%Y-%m-%d")
        period1 = int(time.mktime(start_date_obj.timetuple()))
        period2 = current_timestamp
        
        logger.warning(f"Daily data for today's date {human_readable_time}")
        logger.info(f"Checking updates for period1={period1} & period2={period2} for stocks daily")

        # Define base path for daily updates
        files_path = f'/kaggle/working/daily_update/'
        os.makedirs('/kaggle/working/daily_update/',exist_ok = True)
        os.makedirs('/kaggle/working/daily_update_to_kaggle/',exist_ok = True)
        AC.delete(
            "daily_data",
            _del_all_ = True
        )
        for stock in tqdm(symbol_list):
            stock_symbol = stock.replace(' ', '')
            json_path = f'{files_path}/{stock_symbol}.json'
            os.makedirs(os.path.dirname(json_path), exist_ok=True)
            
            url = (f'https://query1.finance.yahoo.com/v8/finance/chart/{stock_symbol}?events=capitalGain%7Cdiv%7Csplit'
                f'&formatted=true&includeAdjustedClose=true&interval=1d&period1={period1}&period2={period2}'
                f'&symbol={stock_symbol}&userYfid=true&lang=en-US&region=US')
            try:
                response = rq.get(url, headers=self.headers)
                if response.status_code == 200:
                    with open(json_path, 'wb') as file:
                        file.write(response.content)
                    json_data = pd.read_json(json_path)
                    timestamp = json_data['chart']['result'][0].get('timestamp')
                    if timestamp:
                        new_timestamps = self.return_human_timestamp(timestamp)
                        new_data = json_data['chart']['result'][0]['indicators']['quote'][0]
                        new_data['timestamp'] = new_timestamps
                        data_to_insert = {f'{stock_symbol}':new_data}
                        if data_to_insert:
#                             in database
                            AC.insert(
                                collection_name="daily_data",
                                documents=data_to_insert
                            )
#                            local
                            new_data = pd.DataFrame(new_data)
                            new_data.to_csv(f'/kaggle/working/daily_update_to_kaggle/{stock}.csv')
                            
                        else:
                            logger.error(f'daily data insertion for {stock} failed .',e)
                else:
                    logger.warning(f"Request failed: {url}, Status code: {response.status_code}")
                    continue
            except:
                continue
        logger.info("Daily data update finished.")
  
    
    def update_prices_for_per_minute(self, symbol_list,last_date):
        os.makedirs(f'/kaggle/working/per_minute/', exist_ok=True)
        os.makedirs(f'/kaggle/working/per_minute_to_kaggle/', exist_ok=True) 
        date_time_obj = datetime.strptime(last_date, '%Y-%m-%d %H:%M:%S')
        period1 = int(date_time_obj.timestamp())
        seven_days_back = date_time_obj - timedelta(days=7)
        period2 = int(seven_days_back.timestamp())
  
        logger.info(f"Checking updates for period1={period1} & period2={period2} for stocks per minute.")
            
        AC.delete(
                collection_name="per_minute_data",
                _del_all_ = True
        )
        if symbol_list:
            for stock in tqdm(symbol_list):
                try:
                    stock_symbol = stock.replace(' ', '')
                    link = f'https://query2.finance.yahoo.com/v8/finance/chart/{stock_symbol}?period1={period2}&period2={period1}&interval=1m&includePrePost=true&events=div%7Csplit%7Cearn&&lang=en-US&region=US'
                    response = rq.get(link, headers=self.headers)
                    tmppath = f'/kaggle/working/per_minute/{stock_symbol}.json'
                    if response.status_code == 200:
                        with open(tmppath, 'wb') as jsn:
                            jsn.write(response.content)
                        json_data  = pd.read_json(tmppath)
                        timestamp = json_data['chart'][0][0]['timestamp']
                        json_data = json_data['chart'][0][0]["indicators"]["quote"][0]
                        try:
                            new_timestamps = self.return_human_timestamp(timestamp)
                            json_data['timestamp'] = new_timestamps
                            data_to_insert = {f'{stock_symbol}':json_data}
    #                         to database
                            if data_to_insert:
                                # AC.insert(
                                #     collection_name="per_minute_data",
                                #     documents=data_to_insert
                                # )
    #                           to csv
                                json_data = pd.DataFrame(json_data)
                                json_data.to_csv(f'/kaggle/working/per_minute_to_kaggle/{stock}.csv')
                            else:
                                logger.warning(f'per minute data insertion data insertion for {stock} failed .'),e

                        except Exception as e:
                            logger.warning(f"Request failed: {link}, Status code: {response.status_code}")
                            print('failed',e)
                            continue
                except:
                    continue
                
        else:
            logger.warning("It is not Sunday today. Skipping the update step.")
        logger.info("Per minute update finished.")

    def update_stocks_list_for_today(self):
        AC.delete('master',_del_all_ = True)
        stocks = AC.find("daily_data")
        stockslist = []
        for st in tqdm(stocks):
            stockslist.append(list(st.keys())[1])
        self.available_stocks = stockslist
        AC.insert("master",{'stocks':stockslist})
        logger.warning("stocks list updated !")
        
    # specific to kaggle
    def Kaggle_process_daily_data(self,symbol_list):
        self.update_prices_for_daily(symbol_list)
        megadatadailyframe = pd.DataFrame()
        daily_files_csv = os.listdir('/kaggle/working/daily_update_to_kaggle')

        for csv in tqdm(daily_files_csv):
            df = pd.read_csv(f'/kaggle/working/daily_update_to_kaggle/{csv}')
            df['stockname'] = csv.split('.')[0]
            megadatadailyframe = pd.concat([megadatadailyframe,df],axis = 0)
            
        os.makedirs(f'/kaggle/working/daily_update_to_kaggle_final',exist_ok = True)
        megadatadailyframe.to_csv('/kaggle/working/daily_update_to_kaggle_final/stocks.csv')
    
    def Kaggle_process_per_minute_data(self,symbol_list,last_date,weekday):
        self.update_prices_for_per_minute(symbol_list,last_date)
        megadataperminuteframe = pd.DataFrame()
        perminute_files_csv = os.listdir('/kaggle/working/per_minute_to_kaggle/')

        for csv in tqdm(perminute_files_csv):
            df = pd.read_csv(f'/kaggle/working/per_minute_to_kaggle/{csv}')
            df['stockname'] = csv.split('.')[0]
            megadataperminuteframe = pd.concat([megadataperminuteframe,df],axis = 0)
        os.makedirs(f'/kaggle/working/per_minute_to_kaggle_final',exist_ok = True)
        past_df = pd.read_csv("/kaggle/input/real-time-stocks-data/stocks.csv")
        megadataperminuteframe = pd.concat([megadataperminuteframe,past_df],axis = 0)
        megadataperminuteframe = megadataperminuteframe.drop_duplicates()
        megadataperminuteframe = megadataperminuteframe[['low','high','volume','open','close','stockname','timestamp']]
        megadataperminuteframe.to_csv('/kaggle/working/per_minute_to_kaggle_final/stocks.csv')

# Recover data if any lost

In [6]:
class recoverData:
    def __init__(self):
        self.symbol_list = None
        self.avdata = None
        self.headers = {'User-Agent': 'Mozilla/5.0'}
        self.batches_to_check = []
        self.recoverlist = []

    def setup(self):
        os.makedirs("/kaggle/working/per_minute_recover/json/", exist_ok=True)
        os.makedirs("/kaggle/working/per_minute_recover/csv/", exist_ok=True)
        os.makedirs("/kaggle/working/per_minute_recover/final/", exist_ok=True)
        print("set up directories")
        self.avdata = pd.read_csv('/kaggle/input/real-time-stocks-data/stocks.csv')
        self.symbol_list = self.avdata['stockname'].unique()
        print("stock list and dataset loaded .")

    def return_unix_timestamps(self, date_strings):
        try:
            dt = datetime.strptime(date_strings, '%Y-%m-%d')
            return int(dt.timestamp())
        except ValueError:
            return None

    def return_human_timestamp(self, timestamps):
        try:
            return [datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') for ts in timestamps]
        except Exception:
            return None

    def collect_per_minute_data(self, stock_symbol, start_date, end_date):
        period1 = start_date
        period2 = end_date
        if period1 is None or period2 is None:
            print(f"Invalid dates provided: {start_date}, {end_date}")
            return

        stock_symbol = stock_symbol.replace(' ', '')
        link = f"https://query2.finance.yahoo.com/v8/finance/chart/{stock_symbol}?period1={period1}&period2={period2}&interval=1m&includePrePost=true&events=div%7Csplit%7Cearnings&lang=en-US&region=US"
        response = rq.get(link, headers=self.headers)

        if response.status_code == 200:
            tmppath = f'/kaggle/working/per_minute_recover/json/{stock_symbol}.json'
            with open(tmppath, 'wb') as jsn:
                jsn.write(response.content)
            json_data = response.json()

            try:
                timestamps = json_data['chart']['result'][0]['timestamp']
                indicators = json_data['chart']['result'][0]['indicators']['quote'][0]
                indicators['timestamp'] = self.return_human_timestamp(timestamps)

                df = pd.DataFrame(indicators)
                os.makedirs(f'/kaggle/working/per_minute_recover/csv/{stock_symbol}/',exist_ok = True)
                df.to_csv(
                    f"/kaggle/working/per_minute_recover/csv/{stock_symbol}/{start_date}_{end_date}.csv",
                    index=False,
                )
            except KeyError as e:
                print(f"Error processing JSON data: {e}")
        else:
            print(f"Failed to fetch data for {stock_symbol}. Status code: {response.status_code}")

    def setup_batches(self):
        today = datetime.now()
        while today.weekday() != 6:  # Find the latest sunday
            today -= timedelta(days=1)
        
        # Generate the four weeks from today (Monday to Friday)
        self.batches_to_check = []
        for i in range(4):  # Generate 4 weeks
            end_date = today - timedelta(weeks=i)
            start_date = end_date - timedelta(days=6)  # Monday
            self.batches_to_check.append((
                start_date.replace(hour=0, minute=0, second=0), 
                end_date.replace(hour=23, minute=59, second=59)
            ))
        print(f"Generated batches to check")
    
    def chheck_stock_for_batch(self, symbol):   
        newdf = self.avdata[self.avdata['stockname'] == symbol]
        newdf['dates'] = pd.to_datetime(newdf['timestamp']).dt.date
        available_dates = set(newdf['dates'])    
        missing_weeks = []

        for start_date, end_date in self.batches_to_check:
            # Business days: Monday to Friday
            week_dates = set(pd.date_range(start=start_date, end=end_date, freq='B').date)
            
            # Check if all the business days (Monday to Friday) are missing
            if week_dates.isdisjoint(available_dates):
                # print(f"{symbol} Week {start_date} to {end_date} is entirely missing (all weekdays missing)")
                # Convert start_date and end_date to Unix timestamps
                start_unix = int(start_date.timestamp())
                end_unix = int(end_date.timestamp())
                missing_weeks.append([symbol, start_unix, end_unix])
        return missing_weeks

    def detect_all_stocks(self):
        all_stocks = self.avdata['stockname'].unique()
        self.setup_batches()  
        for st in tqdm(all_stocks):
            missing_weeks = self.chheck_stock_for_batch(st)
            self.recoverlist = self.recoverlist + missing_weeks
        print("setup for targets completed")
        return self.recoverlist

    def download_bunches_mass(self,targets):
        print("starting scrappers")
        try:
            for item in tqdm(targets):
                self.collect_per_minute_data(item[0],item[1],item[2])
        except Exception as error:
            print(error)

    def merge_new_data(self):
        newframe = pd.DataFrame()
        print("starting mergers")
        all_csvs = os.listdir('/kaggle/working/per_minute_recover/csv/')
        for a_csv in tqdm(all_csvs):
            all_files = os.listdir(f'/kaggle/working/per_minute_recover/csv/{a_csv}')
            for a_file in all_files:
                tmpdf = pd.read_csv(f'/kaggle/working/per_minute_recover/csv/{a_csv}/{a_file}')
                tmpdf['stockname'] = a_csv.split('.')[0]
                newframe = pd.concat([newframe,tmpdf],axis = 0)
        print("finishing mergers")
        return newframe

    def final_merge(self,newcollecteddf):
        newdf = pd.concat([self.avdata,newcollecteddf])
        del self.avdata
        gc.collect()
        print("prepared new dataframe")
        newdf = newdf[['stockname','timestamp','open','high','low','close','volume']]
        newdf.to_csv('/kaggle/working/per_minute_recover/final/stocks.csv')
        print("wrote new file")
        print("finishing main merge")

    def create_metadata_to_push_recover(self):
        print('Creating metadata file for per minute data>>>>')
        data = {
            "id": "ayushkhaire/real-time-stocks-data"
        }
        metadata_file_location = '/kaggle/working/per_minute_recover/final/dataset-metadata.json' 
        with open(metadata_file_location, 'w', encoding='utf-8') as metadata_file:
            json.dump(data, metadata_file)
        print('Metadata file created for per minute data')

    def upload_recovered_to_kaggle(self):
        os.environ['KAGGLE_USERNAME'] = kaggle_username
        os.environ['KAGGLE_KEY'] = kaggle_apikey
        retries = 0
        while retries < 5:
            try:
                command = "kaggle datasets version -p '/kaggle/working/per_minute_recover/final' -m 'Update' -r zip"
                subprocess.run(command, shell=True, check=True)
                logger.info("Upload completefor per minute data")
                break
            except Exception as error:
                logger.error(f"Error from Kaggle: {error}")
                time.sleep(5)
                retries += 1  

In [7]:
# ndf = RCC.avdata[RCC.avdata['stockname'] == "PLTR"]
# ndf['d'] = ndf['timestamp'].str.split(" ").str[0]  # Extract only the date part
# print(ndf['d'].unique())  # Print unique dates for manual verification
# ndf

# Driver code

In [8]:
# make force - True when notebook fails and do not update per minute data , and give saturdday data
force = False

In [9]:
AC.delete("daily_data",_del_all_ = True)
AC.delete("per_minute_data",_del_all_ = True)

0

In [10]:
STM = stocksManager()
symbols = STM.collect_stock_symbols()
finals_symbols = []
for n in symbols['names']:
    for key in n.keys():
        finals_symbols=finals_symbols+n[key]
finals_symbols = list(set(finals_symbols))

100%|██████████| 2/2 [00:02<00:00,  1.02s/it]


In [11]:
STM.Kaggle_process_daily_data(finals_symbols)
today = datetime.now()
if today.weekday() == 0 or force == True:
    print("there is monday today")
    yesterday = today - timedelta(days=1)
    yesterdays_date = yesterday.strftime('%Y-%m-%d 00:00:00')
    STM.Kaggle_process_per_minute_data(symbol_list = finals_symbols,last_date = yesterdays_date,weekday = 0)
else:
    print("there is no monday today")
STM.update_stocks_list_for_today()

100%|██████████| 2235/2235 [15:45<00:00,  2.36it/s]
100%|██████████| 2229/2229 [06:52<00:00,  5.40it/s]


there is no monday today


100%|██████████| 2229/2229 [00:00<00:00, 1202611.73it/s]


# create metadata files

In [12]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
 
print('Creating metadata file for daily data>>>>')
data = {
    "id": "ayushkhaire/stock-past-one-year-data"
}
metadata_file_location = '/kaggle/working/daily_update_to_kaggle_final/dataset-metadata.json' 
with open(metadata_file_location, 'w', encoding='utf-8') as metadata_file:
    json.dump(data, metadata_file)
print('Metadata file created for daily data')

Creating metadata file for daily data>>>>
Metadata file created for daily data


In [13]:
if today.weekday() == 0 or force == True:
    print('Creating metadata file for per minute data>>>>')
    data = {
        "id": "ayushkhaire/real-time-stocks-data"
    }
    metadata_file_location = '/kaggle/working/per_minute_to_kaggle_final/dataset-metadata.json' 
    with open(metadata_file_location, 'w', encoding='utf-8') as metadata_file:
        json.dump(data, metadata_file)
    print('Metadata file created for per minute data')
else:
    print("there is no monday today")

there is no monday today


# upload

In [14]:
os.environ['KAGGLE_USERNAME'] = kaggle_username
os.environ['KAGGLE_KEY'] = kaggle_apikey

In [15]:
retries = 0
while retries < 5:
    try:
        command = "kaggle datasets version -p '/kaggle/working/daily_update_to_kaggle_final' -m 'Update' -r zip"
        subprocess.run(command, shell=True, check=True)
        logger.info("Upload completefor daily data")
        break
    except Exception as error:
        logger.error(f"Error from Kaggle: {error}")
        time.sleep(5)
        retries += 1

Starting upload for file stocks.csv


100%|██████████| 531M/531M [00:06<00:00, 89.0MB/s]


Upload successful: stocks.csv (531MB)
Dataset version is being created. Please check progress at https://www.kaggle.com/ayushkhaire/stock-past-one-year-data


In [16]:
if today.weekday() == 0 or force == True:
    print("there is monday today")    
    retries = 0
    while retries < 5:
        try:
            command = "kaggle datasets version -p '/kaggle/working/per_minute_to_kaggle_final' -m 'Update' -r zip"
            subprocess.run(command, shell=True, check=True)
            logger.info("Upload completefor per minute data")
            break
        except Exception as error:
            logger.error(f"Error from Kaggle: {error}")
            time.sleep(5)
            retries += 1
else:
    print("there is no saturday today")    

there is no saturday today


# Recover lost one

In [17]:
RCC = recoverData()
RCC.setup()
targets = RCC.detect_all_stocks()
targets = targets + STM.available_stocks
RCC.download_bunches_mass(targets)
newcollecteddf = RCC.merge_new_data()
RCC.final_merge(newcollecteddf)
RCC.create_metadata_to_push_recover()
RCC.upload_recovered_to_kaggle()

set up directories
stock list and dataset loaded .
Generated batches to check


100%|██████████| 606/606 [04:17<00:00,  2.35it/s]


setup for targets completed
starting scrappers


  0%|          | 4/3214 [00:00<09:20,  5.72it/s]

Failed to fetch data for PLTR. Status code: 422


  0%|          | 7/3214 [00:01<08:16,  6.45it/s]

Failed to fetch data for MSTR. Status code: 422


  0%|          | 8/3214 [00:01<08:11,  6.52it/s]

Failed to fetch data for ROKU. Status code: 422


  0%|          | 15/3214 [00:02<07:24,  7.20it/s]

Failed to fetch data for NCNO. Status code: 422


  1%|          | 18/3214 [00:02<07:06,  7.49it/s]

Failed to fetch data for DOCU. Status code: 422


  1%|          | 25/3214 [00:03<07:35,  7.00it/s]

Failed to fetch data for NATL. Status code: 422


  1%|          | 26/3214 [00:04<07:28,  7.11it/s]

Failed to fetch data for LGND. Status code: 422


  1%|          | 31/3214 [00:04<07:13,  7.34it/s]

Failed to fetch data for HOOD. Status code: 422


  1%|          | 36/3214 [00:05<07:34,  7.00it/s]

Failed to fetch data for CRWD. Status code: 422


  1%|          | 39/3214 [00:05<06:53,  7.68it/s]

Failed to fetch data for INGM. Status code: 422


  1%|▏         | 42/3214 [00:06<06:50,  7.73it/s]

Failed to fetch data for CRS. Status code: 422


  1%|▏         | 43/3214 [00:06<07:12,  7.34it/s]

Failed to fetch data for APPN. Status code: 422


  2%|▏         | 50/3214 [00:07<06:48,  7.75it/s]

Failed to fetch data for IPGP. Status code: 422


  2%|▏         | 53/3214 [00:07<06:49,  7.71it/s]

Failed to fetch data for IAG. Status code: 422


  2%|▏         | 56/3214 [00:08<06:50,  7.68it/s]

Failed to fetch data for CFLT. Status code: 422


  2%|▏         | 59/3214 [00:08<07:05,  7.42it/s]

Failed to fetch data for CRDO. Status code: 422


  2%|▏         | 62/3214 [00:09<07:05,  7.40it/s]

Failed to fetch data for WULF. Status code: 422


  2%|▏         | 63/3214 [00:09<07:31,  6.98it/s]

Failed to fetch data for CRSP. Status code: 422


  2%|▏         | 70/3214 [00:10<07:48,  6.71it/s]

Failed to fetch data for DFH. Status code: 422


  2%|▏         | 75/3214 [00:11<07:22,  7.09it/s]

Failed to fetch data for CXM. Status code: 422


  2%|▏         | 78/3214 [00:11<07:15,  7.21it/s]

Failed to fetch data for ATAT. Status code: 422


  3%|▎         | 81/3214 [00:11<07:06,  7.35it/s]

Failed to fetch data for GOOGL. Status code: 422


  3%|▎         | 88/3214 [00:12<07:31,  6.92it/s]

Failed to fetch data for PSTG. Status code: 422


  3%|▎         | 91/3214 [00:13<06:51,  7.60it/s]

Failed to fetch data for CLF. Status code: 422


  3%|▎         | 94/3214 [00:13<06:56,  7.48it/s]

Failed to fetch data for IDYA. Status code: 422


  3%|▎         | 97/3214 [00:14<06:59,  7.42it/s]

Failed to fetch data for VOD. Status code: 422


  3%|▎         | 102/3214 [00:14<07:14,  7.17it/s]

Failed to fetch data for PONY. Status code: 422


  3%|▎         | 103/3214 [00:15<08:04,  6.43it/s]

Failed to fetch data for JOBY. Status code: 422


  3%|▎         | 112/3214 [00:16<09:13,  5.61it/s]

Failed to fetch data for DJT. Status code: 422


  4%|▎         | 119/3214 [00:17<07:20,  7.02it/s]

Failed to fetch data for EWTX. Status code: 422


  4%|▍         | 122/3214 [00:18<07:54,  6.52it/s]

Failed to fetch data for GH. Status code: 422


  4%|▍         | 131/3214 [00:19<07:38,  6.72it/s]

Failed to fetch data for ZETA. Status code: 422


  4%|▍         | 134/3214 [00:19<06:44,  7.62it/s]

Failed to fetch data for LYG. Status code: 422


  4%|▍         | 135/3214 [00:20<07:01,  7.31it/s]

Failed to fetch data for WIX. Status code: 422


  4%|▍         | 142/3214 [00:21<06:58,  7.35it/s]

Failed to fetch data for CSX. Status code: 422


  5%|▍         | 145/3214 [00:21<06:49,  7.49it/s]

Failed to fetch data for VYX. Status code: 422


  5%|▍         | 148/3214 [00:21<06:50,  7.47it/s]

Failed to fetch data for CLS. Status code: 422


  5%|▍         | 149/3214 [00:22<07:23,  6.92it/s]

Failed to fetch data for APP. Status code: 422


  5%|▍         | 156/3214 [00:23<07:24,  6.89it/s]

Failed to fetch data for CVE. Status code: 422


  5%|▌         | 161/3214 [00:23<06:30,  7.83it/s]

Failed to fetch data for ATS. Status code: 422


  5%|▌         | 164/3214 [00:24<07:41,  6.61it/s]

Failed to fetch data for AAL. Status code: 422


  5%|▌         | 169/3214 [00:25<06:58,  7.28it/s]

Failed to fetch data for RXRX. Status code: 422


  5%|▌         | 172/3214 [00:25<06:46,  7.49it/s]

Failed to fetch data for ADT. Status code: 422


  5%|▌         | 175/3214 [00:25<06:42,  7.55it/s]

Failed to fetch data for SNAP. Status code: 422


  6%|▌         | 178/3214 [00:26<07:02,  7.18it/s]

Failed to fetch data for NXE. Status code: 422


  6%|▌         | 185/3214 [00:27<07:29,  6.74it/s]

Failed to fetch data for KYMR. Status code: 422


  6%|▌         | 190/3214 [00:28<07:39,  6.58it/s]

Failed to fetch data for WMT. Status code: 422


  6%|▌         | 195/3214 [00:29<07:33,  6.65it/s]

Failed to fetch data for PSLV. Status code: 422


  6%|▌         | 200/3214 [00:30<09:30,  5.28it/s]

Failed to fetch data for SOUN. Status code: 422


  7%|▋         | 209/3214 [00:31<07:39,  6.53it/s]

Failed to fetch data for U. Status code: 422


  7%|▋         | 212/3214 [00:31<06:57,  7.18it/s]

Failed to fetch data for PTON. Status code: 422


  7%|▋         | 213/3214 [00:32<07:10,  6.98it/s]

Failed to fetch data for HL. Status code: 422


  7%|▋         | 220/3214 [00:33<07:25,  6.72it/s]

Failed to fetch data for FLG. Status code: 422


  7%|▋         | 223/3214 [00:33<07:02,  7.08it/s]

Failed to fetch data for PDD. Status code: 422


  7%|▋         | 228/3214 [00:34<07:35,  6.55it/s]

Failed to fetch data for COIN. Status code: 422


  7%|▋         | 231/3214 [00:34<06:54,  7.19it/s]

Failed to fetch data for TMDX. Status code: 422


  7%|▋         | 234/3214 [00:35<06:33,  7.58it/s]

Failed to fetch data for IOT. Status code: 422


  7%|▋         | 237/3214 [00:35<06:46,  7.32it/s]

Failed to fetch data for SYM. Status code: 422


  8%|▊         | 242/3214 [00:36<07:12,  6.87it/s]

Failed to fetch data for LSPD. Status code: 422


  8%|▊         | 249/3214 [00:37<07:39,  6.45it/s]

Failed to fetch data for UBER. Status code: 422


  8%|▊         | 252/3214 [00:37<06:46,  7.29it/s]

Failed to fetch data for VKTX. Status code: 422


  8%|▊         | 255/3214 [00:38<06:43,  7.33it/s]

Failed to fetch data for RNA. Status code: 422


  8%|▊         | 258/3214 [00:38<06:24,  7.69it/s]

Failed to fetch data for EW. Status code: 422


  8%|▊         | 261/3214 [00:39<06:22,  7.73it/s]

Failed to fetch data for UEC. Status code: 422


  8%|▊         | 264/3214 [00:39<06:34,  7.48it/s]

Failed to fetch data for DNLI. Status code: 422


  8%|▊         | 265/3214 [00:39<07:18,  6.72it/s]

Failed to fetch data for GTLB. Status code: 422


  8%|▊         | 272/3214 [00:40<07:06,  6.90it/s]

Failed to fetch data for EAT. Status code: 422


  9%|▊         | 275/3214 [00:41<06:36,  7.41it/s]

Failed to fetch data for ZI. Status code: 422


  9%|▊         | 278/3214 [00:41<05:52,  8.34it/s]

Failed to fetch data for M. Status code: 422


  9%|▊         | 281/3214 [00:41<06:00,  8.15it/s]

Failed to fetch data for WRD. Status code: 422


  9%|▉         | 282/3214 [00:42<06:08,  7.95it/s]

Failed to fetch data for TENB. Status code: 422


  9%|▉         | 289/3214 [00:43<06:21,  7.67it/s]

Failed to fetch data for CRNX. Status code: 422


  9%|▉         | 292/3214 [00:43<06:40,  7.30it/s]

Failed to fetch data for HBM. Status code: 422


 10%|▉         | 307/3214 [00:46<07:04,  6.85it/s]

Failed to fetch data for RNG. Status code: 422


 10%|▉         | 312/3214 [00:46<06:36,  7.32it/s]

Failed to fetch data for KVUE. Status code: 422


 10%|▉         | 315/3214 [00:47<06:14,  7.74it/s]

Failed to fetch data for BCS. Status code: 422


 10%|▉         | 318/3214 [00:47<06:06,  7.89it/s]

Failed to fetch data for MNSO. Status code: 422


 10%|▉         | 321/3214 [00:47<06:10,  7.81it/s]

Failed to fetch data for SLVM. Status code: 422


 10%|█         | 328/3214 [00:49<07:34,  6.34it/s]

Failed to fetch data for INTA. Status code: 422


 10%|█         | 333/3214 [00:49<06:54,  6.95it/s]

Failed to fetch data for RKLB. Status code: 422


 10%|█         | 336/3214 [00:50<06:40,  7.18it/s]

Failed to fetch data for IMVT. Status code: 422


 10%|█         | 337/3214 [00:50<06:45,  7.09it/s]

Failed to fetch data for NRG. Status code: 422


 11%|█         | 342/3214 [00:51<06:47,  7.05it/s]

Failed to fetch data for RGTI. Status code: 422


 11%|█         | 343/3214 [00:51<06:48,  7.04it/s]

Failed to fetch data for PCVX. Status code: 422


 11%|█         | 348/3214 [00:51<06:27,  7.39it/s]

Failed to fetch data for KO. Status code: 422


 11%|█         | 357/3214 [00:53<06:54,  6.89it/s]

Failed to fetch data for GRFS. Status code: 422


 11%|█         | 360/3214 [00:53<06:23,  7.45it/s]

Failed to fetch data for OSIS. Status code: 422


 11%|█▏        | 369/3214 [00:55<07:23,  6.41it/s]

Failed to fetch data for GRAB. Status code: 422


 12%|█▏        | 372/3214 [00:55<06:28,  7.31it/s]

Failed to fetch data for KGC. Status code: 422


 12%|█▏        | 375/3214 [00:56<06:10,  7.65it/s]

Failed to fetch data for SMTC. Status code: 422


 12%|█▏        | 378/3214 [00:56<06:34,  7.18it/s]

Failed to fetch data for REZI. Status code: 422


 12%|█▏        | 381/3214 [00:56<06:08,  7.68it/s]

Failed to fetch data for SLG. Status code: 422


 12%|█▏        | 386/3214 [00:57<05:45,  8.19it/s]

Failed to fetch data for VICR. Status code: 422


 12%|█▏        | 389/3214 [00:58<06:31,  7.22it/s]

Failed to fetch data for BTSG. Status code: 422


 12%|█▏        | 392/3214 [00:58<06:18,  7.46it/s]

Failed to fetch data for BP. Status code: 422


 12%|█▏        | 395/3214 [00:58<06:23,  7.34it/s]

Failed to fetch data for WBA. Status code: 422


 12%|█▏        | 400/3214 [00:59<06:48,  6.90it/s]

Failed to fetch data for BROS. Status code: 422


 13%|█▎        | 409/3214 [01:01<06:59,  6.69it/s]

Failed to fetch data for BB. Status code: 422


 13%|█▎        | 414/3214 [01:01<07:11,  6.49it/s]

Failed to fetch data for DOCN. Status code: 422


 13%|█▎        | 419/3214 [01:02<06:55,  6.72it/s]

Failed to fetch data for QUBT. Status code: 422


 13%|█▎        | 422/3214 [01:03<07:27,  6.24it/s]

Failed to fetch data for ARCC. Status code: 422


 13%|█▎        | 431/3214 [01:04<07:00,  6.63it/s]

Failed to fetch data for PCG. Status code: 422


 14%|█▎        | 434/3214 [01:05<06:40,  6.95it/s]

Failed to fetch data for BE. Status code: 422


 14%|█▎        | 437/3214 [01:05<05:51,  7.89it/s]

Failed to fetch data for ALAB. Status code: 422


 14%|█▎        | 438/3214 [01:05<05:49,  7.95it/s]

Failed to fetch data for EVCM. Status code: 422


 14%|█▍        | 443/3214 [01:06<06:31,  7.08it/s]

Failed to fetch data for NIO. Status code: 422


 14%|█▍        | 446/3214 [01:06<06:11,  7.46it/s]

Failed to fetch data for OSCR. Status code: 422


 14%|█▍        | 449/3214 [01:07<07:19,  6.29it/s]

Failed to fetch data for TLN. Status code: 422


 14%|█▍        | 454/3214 [01:07<06:41,  6.87it/s]

Failed to fetch data for NKE. Status code: 422


 14%|█▍        | 457/3214 [01:08<06:07,  7.51it/s]

Failed to fetch data for OUT. Status code: 422


 14%|█▍        | 458/3214 [01:08<06:16,  7.32it/s]

Failed to fetch data for SRRK. Status code: 422


 14%|█▍        | 463/3214 [01:09<06:40,  6.86it/s]

Failed to fetch data for INTC. Status code: 422


 14%|█▍        | 466/3214 [01:09<06:00,  7.63it/s]

Failed to fetch data for JD. Status code: 422


 15%|█▍        | 469/3214 [01:09<06:02,  7.58it/s]

Failed to fetch data for DJTWW. Status code: 422


 15%|█▍        | 472/3214 [01:10<06:24,  7.12it/s]

Failed to fetch data for QBTS. Status code: 422


 15%|█▍        | 477/3214 [01:11<07:14,  6.30it/s]

Failed to fetch data for PATH. Status code: 422


 15%|█▍        | 480/3214 [01:11<06:44,  6.76it/s]

Failed to fetch data for SQ. Status code: 422


 15%|█▌        | 483/3214 [01:12<06:31,  6.98it/s]

Failed to fetch data for AFRM. Status code: 422


 15%|█▌        | 486/3214 [01:12<06:26,  7.06it/s]

Failed to fetch data for MU. Status code: 422


 15%|█▌        | 491/3214 [01:13<07:03,  6.42it/s]

Failed to fetch data for CVNA. Status code: 422


 15%|█▌        | 496/3214 [01:14<06:58,  6.49it/s]

Failed to fetch data for SG. Status code: 422


 16%|█▌        | 501/3214 [01:14<06:10,  7.32it/s]

Failed to fetch data for MIR. Status code: 422


 16%|█▌        | 504/3214 [01:15<06:05,  7.41it/s]

Failed to fetch data for STRL. Status code: 422


 16%|█▌        | 507/3214 [01:15<06:04,  7.44it/s]

Failed to fetch data for DAL. Status code: 422


 16%|█▌        | 510/3214 [01:16<06:37,  6.81it/s]

Failed to fetch data for NCLH. Status code: 422


 16%|█▌        | 515/3214 [01:16<06:27,  6.96it/s]

Failed to fetch data for SNOW. Status code: 422


 16%|█▌        | 516/3214 [01:17<06:38,  6.77it/s]

Failed to fetch data for MBLY. Status code: 422


 16%|█▌        | 521/3214 [01:17<06:40,  6.73it/s]

Failed to fetch data for GME. Status code: 422


 16%|█▋        | 526/3214 [01:18<06:21,  7.04it/s]

Failed to fetch data for FROG. Status code: 422


 16%|█▋        | 529/3214 [01:19<05:56,  7.53it/s]

Failed to fetch data for GERN. Status code: 422


 17%|█▋        | 532/3214 [01:19<05:44,  7.77it/s]

Failed to fetch data for MDLZ. Status code: 422


 17%|█▋        | 535/3214 [01:19<05:42,  7.82it/s]

Failed to fetch data for PRIM. Status code: 422


 17%|█▋        | 538/3214 [01:20<05:39,  7.88it/s]

Failed to fetch data for APGE. Status code: 422


 17%|█▋        | 543/3214 [01:20<05:34,  7.98it/s]

Failed to fetch data for SGHC. Status code: 422


 17%|█▋        | 546/3214 [01:21<06:13,  7.14it/s]

Failed to fetch data for VIK. Status code: 422


 17%|█▋        | 549/3214 [01:21<05:25,  8.18it/s]

Failed to fetch data for BTDR. Status code: 422


 17%|█▋        | 552/3214 [01:21<05:26,  8.14it/s]

Failed to fetch data for LNVGY. Status code: 422


 17%|█▋        | 555/3214 [01:22<05:59,  7.40it/s]

Failed to fetch data for WFC. Status code: 422


 17%|█▋        | 562/3214 [01:23<06:57,  6.35it/s]

Failed to fetch data for NEO. Status code: 422


 18%|█▊        | 571/3214 [01:24<06:14,  7.07it/s]

Failed to fetch data for DYN. Status code: 422


 18%|█▊        | 578/3214 [01:25<06:02,  7.27it/s]

Failed to fetch data for EXTR. Status code: 422


 18%|█▊        | 583/3214 [01:26<05:17,  8.28it/s]

Failed to fetch data for OMAB. Status code: 422


 18%|█▊        | 586/3214 [01:27<05:51,  7.47it/s]

Failed to fetch data for AMED. Status code: 422


 18%|█▊        | 589/3214 [01:27<05:42,  7.66it/s]

Failed to fetch data for STLA. Status code: 422


 18%|█▊        | 592/3214 [01:27<05:33,  7.87it/s]

Failed to fetch data for COHR. Status code: 422


 19%|█▊        | 595/3214 [01:28<05:27,  8.00it/s]

Failed to fetch data for HBAN. Status code: 422


 19%|█▊        | 598/3214 [01:28<05:04,  8.60it/s]

Failed to fetch data for CGON. Status code: 422


 19%|█▊        | 601/3214 [01:28<05:02,  8.65it/s]

Failed to fetch data for UI. Status code: 422


 19%|█▉        | 604/3214 [01:29<05:19,  8.16it/s]

Failed to fetch data for SOBO. Status code: 422


 19%|█▉        | 607/3214 [01:29<05:21,  8.10it/s]

Failed to fetch data for NBIS. Status code: 422


 19%|█▉        | 610/3214 [01:30<05:28,  7.92it/s]

Failed to fetch data for ASAN. Status code: 422


 19%|█▉        | 615/3214 [01:30<05:49,  7.44it/s]

Failed to fetch data for GEV. Status code: 422


 19%|█▉        | 618/3214 [01:31<05:49,  7.44it/s]

Failed to fetch data for AVPT. Status code: 422


 19%|█▉        | 621/3214 [01:31<05:31,  7.83it/s]

Failed to fetch data for CX. Status code: 422


 19%|█▉        | 624/3214 [01:31<05:46,  7.48it/s]

Failed to fetch data for NLY. Status code: 422


 20%|█▉        | 627/3214 [01:32<05:35,  7.70it/s]

Failed to fetch data for FUTU. Status code: 422


 20%|█▉        | 630/3214 [01:32<05:28,  7.86it/s]

Failed to fetch data for VRN. Status code: 422


 20%|█▉        | 633/3214 [01:33<05:49,  7.38it/s]

Failed to fetch data for SBSW. Status code: 422


 20%|█▉        | 636/3214 [01:33<06:02,  7.10it/s]

Failed to fetch data for CART. Status code: 422


 20%|█▉        | 641/3214 [01:34<05:39,  7.57it/s]

Failed to fetch data for TCOM. Status code: 422


 20%|█▉        | 642/3214 [01:34<06:06,  7.01it/s]

Failed to fetch data for NVCR. Status code: 422


 20%|██        | 647/3214 [01:35<06:01,  7.10it/s]

Failed to fetch data for XOM. Status code: 422


 20%|██        | 652/3214 [01:36<06:52,  6.21it/s]

Failed to fetch data for SMMT. Status code: 422


 20%|██        | 657/3214 [01:36<06:27,  6.59it/s]

Failed to fetch data for VST. Status code: 422


 21%|██        | 662/3214 [01:37<06:11,  6.88it/s]

Failed to fetch data for CVX. Status code: 422


 21%|██        | 665/3214 [01:38<06:00,  7.08it/s]

Failed to fetch data for ARWR. Status code: 422


 21%|██        | 666/3214 [01:38<06:19,  6.72it/s]

Failed to fetch data for CELH. Status code: 422


 21%|██        | 673/3214 [01:39<06:22,  6.64it/s]

Failed to fetch data for BEAM. Status code: 422


 21%|██        | 676/3214 [01:39<05:59,  7.05it/s]

Failed to fetch data for CMCSA. Status code: 422


 21%|██        | 679/3214 [01:40<05:42,  7.41it/s]

Failed to fetch data for BILI. Status code: 422


 21%|██        | 680/3214 [01:40<05:44,  7.37it/s]

Failed to fetch data for BRZE. Status code: 422


 21%|██▏       | 686/3214 [01:41<06:22,  6.61it/s]

Failed to fetch data for JBLU. Status code: 422


 21%|██▏       | 690/3214 [01:41<06:17,  6.68it/s]

Failed to fetch data for RUN. Status code: 422


 22%|██▏       | 693/3214 [01:42<06:30,  6.45it/s]

Failed to fetch data for ITUB. Status code: 422


 22%|██▏       | 698/3214 [01:43<05:55,  7.07it/s]

Failed to fetch data for VRT. Status code: 422


 22%|██▏       | 701/3214 [01:43<05:35,  7.49it/s]

Failed to fetch data for INFA. Status code: 422


 22%|██▏       | 704/3214 [01:43<05:33,  7.54it/s]

Failed to fetch data for AQN. Status code: 422


 22%|██▏       | 707/3214 [01:44<05:36,  7.45it/s]

Failed to fetch data for HUT. Status code: 422


 22%|██▏       | 712/3214 [01:44<05:35,  7.47it/s]

Failed to fetch data for KEY. Status code: 422


 22%|██▏       | 719/3214 [01:46<05:57,  6.97it/s]

Failed to fetch data for ALKT. Status code: 422


 22%|██▏       | 722/3214 [01:46<05:40,  7.33it/s]

Failed to fetch data for MCHP. Status code: 422


 25%|██▌       | 811/3214 [01:59<05:32,  7.23it/s]

Failed to fetch data for RUM. Status code: 422


 25%|██▌       | 815/3214 [02:00<05:37,  7.12it/s]

Failed to fetch data for FL. Status code: 422


 25%|██▌       | 818/3214 [02:01<06:06,  6.54it/s]

Failed to fetch data for AAPL. Status code: 422


 26%|██▌       | 820/3214 [02:01<05:47,  6.88it/s]

Failed to fetch data for ABEV. Status code: 422


 26%|██▌       | 822/3214 [02:01<05:36,  7.11it/s]

Failed to fetch data for VZ. Status code: 422


 26%|██▌       | 828/3214 [02:02<05:42,  6.97it/s]

Failed to fetch data for BTG. Status code: 422


 26%|██▌       | 831/3214 [02:02<05:31,  7.20it/s]

Failed to fetch data for WBD. Status code: 422


 26%|██▌       | 834/3214 [02:03<04:58,  7.97it/s]

Failed to fetch data for MPW. Status code: 422


 26%|██▌       | 838/3214 [02:03<05:00,  7.91it/s]

Failed to fetch data for GOLD. Status code: 422


 26%|██▌       | 841/3214 [02:04<05:16,  7.49it/s]

Failed to fetch data for KMI. Status code: 422


 26%|██▋       | 844/3214 [02:04<05:49,  6.78it/s]

Failed to fetch data for AES. Status code: 422


 26%|██▋       | 846/3214 [02:04<05:12,  7.59it/s]

Failed to fetch data for BBD. Status code: 422


 26%|██▋       | 851/3214 [02:05<05:27,  7.22it/s]

Failed to fetch data for SBUX. Status code: 422


 27%|██▋       | 853/3214 [02:05<05:11,  7.58it/s]

Failed to fetch data for ASTS. Status code: 422


 27%|██▋       | 857/3214 [02:06<05:14,  7.50it/s]

Failed to fetch data for NEM. Status code: 422


 27%|██▋       | 861/3214 [02:07<05:43,  6.85it/s]

Failed to fetch data for META. Status code: 422


 27%|██▋       | 864/3214 [02:07<05:15,  7.45it/s]

Failed to fetch data for RLLCF. Status code: 422


 27%|██▋       | 866/3214 [02:07<04:25,  8.84it/s]

Failed to fetch data for CHWY. Status code: 422
Failed to fetch data for ZGN. Status code: 422


 27%|██▋       | 874/3214 [02:08<05:14,  7.44it/s]

Failed to fetch data for LYFT. Status code: 422


 27%|██▋       | 880/3214 [02:09<05:52,  6.63it/s]

Failed to fetch data for AMZN. Status code: 422


 28%|██▊       | 884/3214 [02:10<05:08,  7.55it/s]

Failed to fetch data for FUJHY. Status code: 422


 28%|██▊       | 887/3214 [02:10<04:39,  8.32it/s]

Failed to fetch data for QS. Status code: 422
Failed to fetch data for SLNO. Status code: 422
Failed to fetch data for OMCL. Status code: 422


 28%|██▊       | 891/3214 [02:11<04:40,  8.28it/s]

Failed to fetch data for OXY. Status code: 422


 28%|██▊       | 892/3214 [02:11<05:24,  7.15it/s]

Failed to fetch data for IONQ. Status code: 422


 28%|██▊       | 896/3214 [02:11<05:12,  7.43it/s]

Failed to fetch data for TSM. Status code: 422


 28%|██▊       | 899/3214 [02:12<05:15,  7.34it/s]

Failed to fetch data for DLTR. Status code: 422


 28%|██▊       | 901/3214 [02:12<04:57,  7.76it/s]

Failed to fetch data for NVO. Status code: 422


 28%|██▊       | 903/3214 [02:12<04:36,  8.35it/s]

Failed to fetch data for MESO. Status code: 422


 28%|██▊       | 906/3214 [02:13<04:56,  7.77it/s]

Failed to fetch data for AMTM. Status code: 422


 28%|██▊       | 909/3214 [02:13<05:23,  7.13it/s]

Failed to fetch data for F. Status code: 422


 28%|██▊       | 913/3214 [02:14<05:29,  6.99it/s]

Failed to fetch data for TM. Status code: 422


 29%|██▊       | 919/3214 [02:15<05:07,  7.46it/s]

Failed to fetch data for NU. Status code: 422


 29%|██▉       | 925/3214 [02:15<04:23,  8.68it/s]

Failed to fetch data for LI. Status code: 422
Failed to fetch data for ASO. Status code: 422


 29%|██▉       | 928/3214 [02:16<04:53,  7.80it/s]

Failed to fetch data for LUMN. Status code: 422


 29%|██▉       | 930/3214 [02:16<04:53,  7.78it/s]

Failed to fetch data for CVS. Status code: 422


 29%|██▉       | 933/3214 [02:16<05:08,  7.39it/s]

Failed to fetch data for T. Status code: 422


 29%|██▉       | 936/3214 [02:17<04:51,  7.81it/s]

Failed to fetch data for UMC. Status code: 422


 29%|██▉       | 938/3214 [02:17<04:47,  7.91it/s]

Failed to fetch data for CSCO. Status code: 422


 29%|██▉       | 944/3214 [02:18<05:33,  6.80it/s]

Failed to fetch data for AI. Status code: 422


 29%|██▉       | 947/3214 [02:18<05:04,  7.43it/s]

Failed to fetch data for FCX. Status code: 422


 30%|██▉       | 956/3214 [02:20<05:21,  7.02it/s]

Failed to fetch data for PLUG. Status code: 422


 30%|██▉       | 960/3214 [02:20<05:23,  6.96it/s]

Failed to fetch data for HIMS. Status code: 422


 30%|███       | 967/3214 [02:21<04:41,  7.98it/s]

Failed to fetch data for OKLO. Status code: 422
Failed to fetch data for IQ. Status code: 422


 30%|███       | 971/3214 [02:22<04:36,  8.11it/s]

Failed to fetch data for ET. Status code: 422


 30%|███       | 976/3214 [02:23<04:19,  8.63it/s]

Failed to fetch data for RIVN. Status code: 422
Failed to fetch data for MRX. Status code: 422


 30%|███       | 979/3214 [02:23<04:25,  8.43it/s]

Failed to fetch data for ERIC. Status code: 422


 31%|███       | 983/3214 [02:23<04:14,  8.76it/s]

Failed to fetch data for PARA. Status code: 422


 31%|███       | 986/3214 [02:24<04:16,  8.69it/s]

Failed to fetch data for AVGO. Status code: 422
Failed to fetch data for C. Status code: 400
Failed to fetch data for M. Status code: 400


 31%|███       | 990/3214 [02:24<03:23, 10.90it/s]

Failed to fetch data for A. Status code: 400
Failed to fetch data for O. Status code: 400
Failed to fetch data for S. Status code: 400


 31%|███       | 991/3214 [02:24<05:24,  6.85it/s]


Failed to fetch data for C. Status code: 400
string index out of range
starting mergers


100%|██████████| 299/299 [00:25<00:00, 11.82it/s]


finishing mergers
prepared new dataframe
wrote new file
finishing main merge
Creating metadata file for per minute data>>>>
Metadata file created for per minute data
Starting upload for file stocks.csv


100%|██████████| 576M/576M [00:06<00:00, 99.1MB/s]


Upload successful: stocks.csv (576MB)
Dataset version is being created. Please check progress at https://www.kaggle.com/ayushkhaire/real-time-stocks-data


In [18]:
shutil.rmtree('/kaggle/working/daily_update')
shutil.rmtree('/kaggle/working/daily_update_to_kaggle')
if today.weekday() == 0 or force == True:
    shutil.rmtree('/kaggle/working/per_minute')
    shutil.rmtree('/kaggle/working/per_minute_to_kaggle')
    shutil.rmtree('/kaggle/working/per_minute_recover')
    shutil.rmtree('/kaggle/working/per_minute_to_kaggle_final')