In [None]:
#pip install kaggle
#import shutil
#shutil.copyfile("./kaggle.json", "C:/Users/Dude/.kaggle/kaggle.json")

In [None]:
import subprocess
from zipfile import ZipFile
import glob
import os
import sys
from datetime import datetime
from pymongo import MongoClient
import pandas as pd
import shutil

IMPORTED_FOLDER = "Imported"
DATE_FORMAT = "%Y-%m-%d"
mongo_url = "mongodb://"
mongo_db = "kiev"
mongo_collection_equipment = "equipment"
file_to_parse = "russia_losses_equipment.csv"

def create_folder(folder_name):
    try:
        os.mkdir(folder_name)
    except:
        pass

def convert_str_to_datetime(date_str):
    try:
        return datetime.strptime(date_str, DATE_FORMAT)
    except:
        return None

def generate_equipment_model(date, aircraft, helicopter, tank, apc, artillery, mrl, auto, fuel_tank, drone, naval_ship, anti_aircraft):
    return {
        "date": date,
        "aircraft": aircraft,
        "helicopter": helicopter,
        "tank": tank,
        "apc": apc,
        "artillery": artillery,
        "mrl": mrl,
        "auto": auto,
        "fuel_tank": fuel_tank,
        "drone": drone,
        "naval_ship": naval_ship,
        "anti_aircraft": anti_aircraft
    }

def connect_mongo(mongo_url, mongo_db):
    client = MongoClient(mongo_url)
    return client[mongo_db]

def get_collection(mongo_db, collection_name):
    return mongo_db[collection_name]

def insert_all_in_collection(items_to_add, collection):
    if len(items_to_add) > 0:
        collection.insert_many(items_to_add)

def check_data_exists(date):
    articles = collection_equipment.find({'date': date})
    for _ in articles:
        return True
    return False

def remove_existing_data(data_list):
    new_list = []
    for item in data_list:
        if not check_data_exists(item['date']):
            new_list.append(item)
    return new_list

# MongoDB connection
mongo_db_conn = connect_mongo(mongo_url, mongo_db)
collection_equipment = get_collection(mongo_db_conn, mongo_collection_equipment)


In [None]:

# Download dataset using Kaggle CLI
subprocess.run(["kaggle", "datasets", "download", "-d", "piterfm/2022-ukraine-russian-war"])
os.chdir("./")

zip_files = glob.glob("*.zip")

create_folder(IMPORTED_FOLDER)

created_folders = []
for zip_file in zip_files:
    folder_name = zip_file.replace('.zip', '')
    full_path = os.path.join(IMPORTED_FOLDER, folder_name)
    create_folder(full_path)
    created_folders.append(full_path)
    with ZipFile(zip_file, 'r') as zip_obj:
        # Extract all contents to the new folder
        zip_obj.extractall(full_path)
    os.remove(zip_file)

projects_files = []
for folder_path in created_folders:
    try:
        os.chdir(folder_path)
    except FileNotFoundError:
        # Try going two dirs up, then into folder_path
        os.chdir(os.path.join("../../", folder_path))
    csv_files = glob.glob("*.csv")
    
    project = {
        "path": folder_path,
        "files": csv_files
    }
    projects_files.append(project)

file_to_parse_path = None
for project in projects_files:
    if file_to_parse in project["files"]:
        file_to_parse_path = os.path.join(project["path"], file_to_parse)
        break

if file_to_parse_path is None:
    sys.exit("File to parse not found.")

try:
    equipment_df = pd.read_csv(file_to_parse_path)
except FileNotFoundError:
    os.chdir("../../")
    equipment_df = pd.read_csv(file_to_parse_path)

In [None]:
equipment_csv_dropped = equipment_df.drop(['special equipment', 'day'], axis=1)
dict_equipment = equipment_csv_dropped.to_dict(orient='records')

new_dict_equipment = []
for record in dict_equipment:
    new_date = convert_str_to_date_time(record['date'])
    new_equip_model = generate_equip_model(
        new_date,
        record['aircraft'],
        record['helicopter'],
        record['tank'],
        record['APC'],
        record['field artillery'],
        record['MRL'],
        record['military auto'],
        record['fuel tank'],
        record['drone'],
        record['naval ship'],
        record['anti-aircraft warfare']
    )
    new_dict_equipment.append(new_equip_model)

new_dict_equipment_removed_db = remove_existing_data(new_dict_equipment)
print(f"Inserting {new_dict_equipment_removed_db}")
insert_all_in_collection(new_dict_equipment_removed_db, collection_equipment)

for project in projects_files:
    for file in project["files"]:
        if file != file_to_parse:
            print(f"This file was not parsed: {project['path']}/{file}")

print("Deleting the main root downloaded folder")
shutil.rmtree(IMPORTED_FOLDER)

inserting [{'date': datetime.datetime(2022, 3, 26, 0, 0), 'aircraft': 117, 'helicopter': 127, 'tank': 575, 'apc': 1640, 'artillery': 293, 'mrl': 91, 'auto': 1131, 'fuel_tank': 73, 'drone': 56, 'naval_ship': 7, 'anti_aircraft': 51}, {'date': datetime.datetime(2022, 3, 27, 0, 0), 'aircraft': 121, 'helicopter': 127, 'tank': 582, 'apc': 1664, 'artillery': 294, 'mrl': 93, 'auto': 1144, 'fuel_tank': 73, 'drone': 56, 'naval_ship': 7, 'anti_aircraft': 52}, {'date': datetime.datetime(2022, 3, 28, 0, 0), 'aircraft': 123, 'helicopter': 127, 'tank': 586, 'apc': 1694, 'artillery': 302, 'mrl': 95, 'auto': 1150, 'fuel_tank': 73, 'drone': 66, 'naval_ship': 7, 'anti_aircraft': 54}, {'date': datetime.datetime(2022, 3, 29, 0, 0), 'aircraft': 127, 'helicopter': 129, 'tank': 597, 'apc': 1710, 'artillery': 303, 'mrl': 96, 'auto': 1178, 'fuel_tank': 73, 'drone': 71, 'naval_ship': 7, 'anti_aircraft': 54}, {'date': datetime.datetime(2022, 3, 30, 0, 0), 'aircraft': 131, 'helicopter': 131, 'tank': 605, 'apc': 17