In [1]:
import pandas as pd
import psycopg2

# Connect to postgres
dbname = "weather"
user = "root"
password = "root"
host = "localhost"
port = "5401"

In [2]:
file_path = '../../dataset-Climate/GlobalYearlyTemp.csv'
year_range = range(1960, 2014)
country_col_name = 'Country Code'
yearly_temp_df = pd.read_csv(file_path)

In [3]:
# Function to check if data already exists in the database
def data_exists(connection, year):
    cursor = connection.cursor()

    check_query = f"""
        SELECT 1
        FROM temperature
        WHERE year = {year}
        AND (city_id IS NULL)
        AND (state_id IS NULL)
        AND (country_code IS NULL);
    """
    cursor.execute(check_query)
    result = cursor.fetchone()

    cursor.close()

    return result is not None

In [4]:
def check_null(amount):
    # Handle NaN values and set amount to NULL
    amount_sql = 'NULL' if pd.isna(amount) or pd.isnull(amount) else str(amount)
    return amount_sql

In [5]:
# Establish a connection to the PostgreSQL database
try:
    connection = psycopg2.connect(
        dbname=dbname,
        user=user,
        password=password,
        host=host,
        port=port
    )

    # Create a cursor object to interact with the database
    cursor = connection.cursor()

    # Read the Population.csv file

    insert_count = 0
    for _, row in yearly_temp_df.iterrows():
        year = row['Year']
        average_temp = check_null(row['AverageTemperature'])
        min_temp = check_null(row['MinimumTemperature'])
        max_temp = check_null(row['MaximumTemperature'])
        land_ocean_avg_temp = check_null(row['LandOceanAverageTemperature'])
        land_ocean_min_temp = check_null(row['LandOceanMinimumTemperature'])
        land_ocean_max_temp = check_null(row['LandOceanMaximumTemperature'])

        # Check if data already exists
        if not data_exists(connection, year):
            # Insert into the database
            insert_query = f"""
                INSERT INTO temperature (
                    year, average_temp, min_temp, max_temp,
                    land_ocean_average_temperature, land_ocean_min_temperature, land_ocean_max_temperature
                )
                VALUES (
                    {year}, {average_temp}, {min_temp}, {max_temp},
                    {land_ocean_avg_temp}, {land_ocean_min_temp}, {land_ocean_max_temp}
                );
            """
            cursor.execute(insert_query)
            insert_count += 1  # Increment the insert count



    # Commit the changes and close the cursor and connection
    connection.commit()
    cursor.close()
    connection.close()

    print(f"Data inserted successfully! Total inserts: {insert_count}")


except psycopg2.Error as e:
    print(f"Error connecting to PostgreSQL: {e}")


Data inserted successfully! Total inserts: 266


In [6]:
yearly_temp_df

Unnamed: 0,Year,AverageTemperature,MinimumTemperature,MaximumTemperature,LandOceanAverageTemperature,LandOceanMinimumTemperature,LandOceanMaximumTemperature
0,1750,8.719,2.772,15.868,,,
1,1751,7.976,0.963,14.405,,,
2,1752,5.780,0.348,8.265,,,
3,1753,8.388,0.559,15.092,,,
4,1754,8.469,-1.249,14.681,,,
...,...,...,...,...,...,...,...
261,2011,9.516,3.282,15.482,15.770,13.928,17.568
262,2012,9.507,3.157,15.076,15.802,13.859,17.450
263,2013,9.607,3.685,15.003,15.854,14.117,17.503
264,2014,9.571,3.500,14.875,15.913,14.136,17.607
