In [6]:
import pyodbc
from concurrent.futures import ThreadPoolExecutor
import requests
import time
from decimal import Decimal  # Import the Decimal class

#Write to my db
# Connection string
server = 'tcp:lds.di.unipi.it'
username = 'Group_ID_200'
password = '89VIG10K'
database = 'Group_ID_200_DB'
connectionString = 'DRIVER={ODBC Driver 17 for SQL Server};SERVER=' + server + ';DATABASE=' + database + ';UID=' + username + ';PWD=' + password


# Assuming you have a SQL Server connection
# Connect to the SQL Server database
conn = pyodbc.connect(connectionString)
cursor = conn.cursor()

# Assuming 'Geography' is your table name
query = "SELECT latitude, longitude FROM Geography WHERE city IS NULL OR state IS NULL"
cursor.execute(query)

# Fetch all records at once
records = cursor.fetchall()
# records = records[20:100]
print(len(records))


53


In [None]:
# Function to insert data using bulk insert
def bulk_insert_data(conn, cursor, table_name, data):
    columns = ', '.join(data[0].keys())
    placeholders = ', '.join(['?'] * len(data[0]))

    # Prepare the values as a list of tuples
    values = [tuple(row.values()) for row in data]

    # Build the bulk insert query
    bulk_insert_query = f'INSERT INTO {table_name} ({columns}) VALUES ({placeholders});'

    # Execute the bulk insert query
    cursor.executemany(bulk_insert_query, values)
    conn.commit()

# Function to insert data into the database
def insert_data_with_ID(conn, cursor, id_dict, table_name, key_dict):
    key_tuple = tuple(key_dict.values())
    if key_tuple not in id_dict:
        id_dict[key_tuple] = next(iter(key_dict.values()))
        columns = ', '.join(key_dict.keys())
        placeholders = ', '.join(['?'] * len(key_dict))
        insert_query = f'INSERT INTO {table_name} ({columns}) VALUES ({placeholders});'

        cursor.execute(insert_query, list(key_dict.values()))
        conn.commit()

def insert_data_without_ID(conn, cursor, table_name, key_dict):
    columns = ', '.join(key_dict.keys())
    placeholders = ', '.join(['?'] * len(key_dict))
    insert_query = f'INSERT INTO {table_name} ({columns}) VALUES ({placeholders});'
    select_query = f'SELECT TOP 1 * FROM {table_name};'

    # Execute the INSERT query
    cursor.execute(insert_query, list(key_dict.values()))
    conn.commit()

    # Execute the SELECT query to fetch the last inserted row's ID
    cursor.execute(select_query)
    id_generated = cursor.fetchone()[0]

    # Return the last inserted row's ID
    return id_generated

def get_or_insert_id(conn, cursor, id_dict, table_name, key_dict):
    key_tuple = tuple(key_dict.values())
    if key_tuple in id_dict:
        return id_dict[key_tuple]
    else:
        last_inserted_id = insert_data_without_ID(conn, cursor, table_name, key_dict)
        id_dict[key_tuple] = last_inserted_id
        return last_inserted_id

date_mapping = parse_dates_xml('dates.xml')

# Read and process Police.csv
with open('Police-9.csv', 'r') as csvfile:
    reader = csv.DictReader(csvfile)
    next(reader)  # Skip the header row

    # List of table names in your database
    table_names = ['Custody', 'Geography', 'Gun', 'Date', 'Incident', 'Partecipant']

    # Clean the tables by deleting all records
    for table_name in table_names:
        cursor.execute(f'DELETE FROM {table_name}')
        conn.commit()
        
    # List to store data for bulk insert
    bulk_data = []

    for row in reader:
        custody_id, participant_age_group, participant_gender, participant_status, participant_type, latitude, longitude, gun_stolen, gun_type, incident_id, date_fk = row

        gun_stolen_bool = 1 if row["gun_stolen"] == 'Stolen' else 0
        gun_key_dict = {"is_stolen": gun_stolen_bool, "gun_type": row['gun_type']}
        gun_id = get_or_insert_id(conn, cursor, gun_id_dict, 'Gun', gun_key_dict)

        partecipant_key = {
            "age_group": row['participant_age_group'],
            "gender": row['participant_gender'],
            "type": row['participant_type'],
            "status": row['participant_status']
        }
        partecipant_id = get_or_insert_id(conn, cursor, partecipant_id_dict, 'Partecipant', partecipant_key)

        # Get location information from latitude and longitude
        latitude, longitude = float(row['latitude']), float(row['longitude'])
        location_info = get_location_info(latitude, longitude)
        city = location_info["city"]
        state = location_info["state"]
        country = "United States"
        continent = "North America"
        geo_key = {
            "latitude": str(latitude), "longitude": str(longitude), "city": city, "state": state, "country": country,
            "continent": continent
        }
        geo_id = get_or_insert_id(conn, cursor, geo_id_dict, 'Geography', geo_key)

        date_id = int(row['date_fk'])
        date_value = date_mapping[date_id]
        date, day, month, year, quarter, day_of_week = compute_date_data(date_value)
        date_key = {
            "date_id": date_id, "the_date": date, "the_day": day, "the_month": month, "the_year": year, "quarter": quarter,
            "day_of_week": day_of_week
        }
        insert_data_with_ID(conn, cursor, date_id_dict, "Date", date_key)

        incident_id = int(row['incident_id'])
        incident_key = {"incident_id": incident_id}
        insert_data_with_ID(conn, cursor, incident_id_dict, "Incident", incident_key)

        custody_id = row['custody_id']
        custody_key = {
            "custody_id": custody_id, "partecipant_id": partecipant_id, "gun_id": gun_id, "geo_id": geo_id,
            "date_id": date_id, "crime_gravity": compute_crime_gravity(row), "incident_id ": incident_id
        }
        insert_data_with_ID(conn, cursor, custody_id_dict, 'Custody', custody_key)

        # Append the row data to the bulk_data list
        bulk_data.append({
            'custody_id': custody_id,
            'participant_age_group': participant_age_group,
            'participant_gender': participant_gender,
            'participant_status': participant_status,
            'participant_type': participant_type,
            'latitude': latitude,
            'longitude': longitude,
            'gun_stolen': gun_stolen,
            'gun_type': gun_type,
            'incident_id': incident_id,
            'date_fk': date_fk,
            # ... other columns ...
        })

        # Commit in batches
        if len(bulk_data) % 1000 == 0:
            bulk_insert_data(conn, cursor, 'YourTableName', bulk_data)
            bulk_data = []

# Commit any remaining records
if bulk_data:
    bulk_insert_data(conn, cursor, 'YourTableName', bulk_data)

# Close the database connection when done
cursor.close()
conn.close()
