In [16]:
import pandas as pd
import psycopg2
import time

# Connect to postgres
dbname = "weather"
user = "root"
password = "root"
host = "localhost"
port = "5401"

In [17]:
file_path = '../../dataset-Climate/GlobalYearlyLandTempByCountry.csv'
year_range = range(1960, 2014)
country_col_name = 'Country Code'
debug_print = True

In [18]:
def custom_print(*args, **kwargs):
    if debug_print:
        print(*args, **kwargs)

In [19]:
def check_null(amount):
    # Handle NaN values and set amount to NULL
    return None if pd.isna(amount) or pd.isnull(amount) else amount

In [20]:
yearly_temp_df = pd.read_csv(file_path)
yearly_temp_df.head(1)

Unnamed: 0,Year,AverageTemperature,MinimumTemperature,MaximumTemperature,Country
0,1750,13.098,3.6,23.893,Albania


In [21]:
with psycopg2.connect(
        dbname=dbname,
        user=user,
        password=password,
        host=host,
        port=port
) as connection:
    cursor = connection.cursor()
    query = f"""
    SELECT * FROM country;
"""
    cursor.execute(query)
    all_country = cursor.fetchall()
    country_dict = {name: code for code, name in all_country}

In [22]:
with psycopg2.connect(
        dbname=dbname,
        user=user,
        password=password,
        host=host,
        port=port
) as connection:
    cursor = connection.cursor()
    query = f"""
    SELECT * FROM state;
"""
    cursor.execute(query)
    all_state = cursor.fetchall()
    state_dict = {state_name: {"id": id, "country_name": country_name} for id, state_name, country_name in all_state}

In [23]:
def get_state_id(state_name):
    return state_dict[state_name]["id"]

In [24]:
country_dict

{'Antarctica': 'ATC',
 'Argentina': 'ARG',
 'Cambodia': 'KHM',
 'American Samoa': 'ASM',
 'Mexico': 'MEX',
 'Zambia': 'ZMB',
 'Belize': 'BLZ',
 'Bulgaria': 'BGR',
 'Isle Of Man': 'IMN',
 'Niger': 'NER',
 'Saudi Arabia': 'SAU',
 'Botswana': 'BWA',
 'Tajikistan': 'TJK',
 'Puerto Rico': 'PRI',
 'Equatorial Guinea': 'GNQ',
 'Solomon Islands': 'SLB',
 'Iceland': 'ISL',
 'Mauritania': 'MRT',
 'Burkina Faso': 'BFA',
 'Ireland': 'IRL',
 'Federated States Of Micronesia': 'FSM',
 'Kenya': 'KEN',
 'Canada': 'CAN',
 'Sweden': 'SWE',
 'Croatia': 'HRV',
 'Dominica': 'DMA',
 'Austria': 'AUT',
 'Hungary': 'HUN',
 'Netherlands': 'NLD',
 'Japan': 'JPN',
 'Guatemala': 'GTM',
 'Comoros': 'COM',
 'Slovakia': 'SVK',
 'Gaza Strip': 'PSE',
 'World': 'WLD',
 'Congo (Democratic Republic Of The)': 'COD',
 'Nepal': 'NPL',
 'Tunisia': 'TUN',
 'Guam': 'GUM',
 'Laos': 'LAO',
 'Italy': 'ITA',
 'Saint Kitts And Nevis': 'KNA',
 'Tanzania': 'TZA',
 'Somalia': 'SOM',
 'Egypt': 'EGY',
 'France': 'FRA',
 'Colombia': 'COL',

In [25]:
def get_country_code(country_name):
    return country_dict[country_name]

In [26]:
def check_state_exist(connection, state_name, country_name):
    cursor = connection.cursor()
    country_code = get_country_code(country_name)
    check_query = f"""
        SELECT 1
        FROM state
        WHERE name = %s and country_code = %s
        LIMIT 1;
    """
    # start_time_exec_and_fetchone = time.time()
    cursor.execute(check_query, (state_name, country_code))
    result = cursor.fetchone()
    return result is not None

In [27]:
def temp_exists(connection, year, country_name):
    cursor = connection.cursor()

    country_code = get_country_code(country_name)

    check_query = """
        SELECT 1
        FROM temperature
        WHERE state_id ISNULL AND city_id ISNULL
        AND year = %s
        AND country_code = %s
        
        LIMIT 1;
    """
    cursor.execute(check_query, (year, country_code))
    result = cursor.fetchone()
    cursor.close()

    return result is not None

In [28]:
def add_country_temp(connection, row):
    cursor = connection.cursor()
    country = row["Country"]
    year = row["Year"]
    average_temp = check_null(row['AverageTemperature'])
    min_temp = check_null(row['MinimumTemperature'])
    max_temp = check_null(row['MaximumTemperature'])

    is_temp_exists = temp_exists(connection, year, country)
    country_code = get_country_code(country)
    if is_temp_exists:
        print("temp_exist")
        return 0
    
    insert_query = f"""
            INSERT INTO temperature (
                year, country_code,
                average_temp, min_temp, max_temp
            )
            VALUES (
               %s, %s,
               %s, %s, %s
            );
        """
    
    cursor.execute(insert_query,
                   (year, country_code,
                    average_temp, min_temp, max_temp))
    return 1
        

In [29]:
# Establish a connection to the PostgreSQL database
try:
    connection = psycopg2.connect(
        dbname=dbname,
        user=user,
        password=password,
        host=host,
        port=port
    )

    total_insert = 0
    for index, row in yearly_temp_df.iterrows():
        # if index == 2:
        #     break
        # total_insert += add_city_to_country(connection, row)
        total_insert += add_country_temp(connection, row)
        
    connection.commit()
    connection.close()

    print(f"Data inserted successfully! Total inserts: {total_insert}")


except psycopg2.Error as e:
    print(f"Error connecting to PostgreSQL: {e}")


Data inserted successfully! Total inserts: 40844


In [30]:
yearly_temp_df

Unnamed: 0,Year,AverageTemperature,MinimumTemperature,MaximumTemperature,Country
0,1750,13.098,3.600,23.893,Albania
1,1750,11.759,3.313,21.520,Andorra
2,1750,6.984,-3.582,17.247,Austria
3,1750,6.691,-6.297,19.282,Belarus
4,1750,10.341,1.171,19.100,Belgium
...,...,...,...,...,...
40839,2013,25.232,19.885,27.623,Vietnam
40840,2013,27.312,25.594,29.147,Virgin Islands
40841,2013,28.130,21.567,32.325,Yemen
40842,2013,21.196,18.266,23.636,Zambia


In [31]:
yearly_temp_df[(yearly_temp_df["Country"] == "Antarctica")]

Unnamed: 0,Year,AverageTemperature,MinimumTemperature,MaximumTemperature,Country
27602,1950,,,,Antarctica
27809,1951,,,,Antarctica
28016,1952,,,,Antarctica
28223,1953,,,,Antarctica
28430,1954,,,,Antarctica
...,...,...,...,...,...
39815,2009,,,,Antarctica
40022,2010,,,,Antarctica
40229,2011,,,,Antarctica
40436,2012,,,,Antarctica
