In [1]:
import pandas as pd
csv_file = './GlobalLandTemperaturesByCity.csv'

data = []
df = pd.read_csv(csv_file)




In [7]:
df = df.dropna(subset=['AverageTemperature'])

# Convert the 'dt' column to datetime
df['dt'] = pd.to_datetime(df['dt'])

# Extract the year from the 'dt' column
df['Year'] = df['dt'].dt.year

# Filter rows to include only years starting from 1750
df = df[df['Year'] >= 1750]

# Group by 'City', 'Latitude', 'Longitude', and 'Year' and calculate the mean temperature
yearly_avg_temp = df.groupby(['City', 'Latitude', 'Longitude', 'Year'])['AverageTemperature'].mean().reset_index()

print(yearly_avg_temp)

            City Latitude Longitude  Year  AverageTemperature
0       A Coruña   42.59N     8.73W  1750           13.489273
1       A Coruña   42.59N     8.73W  1751           13.698500
2       A Coruña   42.59N     8.73W  1752           10.771333
3       A Coruña   42.59N     8.73W  1753           13.151833
4       A Coruña   42.59N     8.73W  1754           13.221000
...          ...      ...       ...   ...                 ...
691126    Ürümqi   44.20N    87.20E  2009            7.287417
691127    Ürümqi   44.20N    87.20E  2010            6.650083
691128    Ürümqi   44.20N    87.20E  2011            6.806083
691129    Ürümqi   44.20N    87.20E  2012            6.600167
691130    Ürümqi   44.20N    87.20E  2013            9.472000

[691131 rows x 5 columns]


In [8]:
city_temp_extremes = yearly_avg_temp.groupby(['City', 'Latitude', 'Longitude'])['AverageTemperature'].agg(['max', 'min']).reset_index()

print(city_temp_extremes)

          City Latitude Longitude        max        min
0     A Coruña   42.59N     8.73W  15.023250  10.771333
1       Aachen   50.63N     6.34E  10.654667   5.275333
2      Aalborg   57.05N    10.33E   9.673833   4.090833
3          Aba    5.63N     8.07E  27.696667  24.926667
4       Abadan   29.74N    48.00E  32.877000  12.220000
...        ...      ...       ...        ...        ...
3505     Århus   57.05N    10.33E   9.673833   4.090833
3506     Çorlu   40.99N    27.69E  15.796875   9.117833
3507     Çorum   40.99N    34.08E  12.800375   7.888000
3508   Öskemen   50.63N    82.39E   4.595500  -0.989000
3509    Ürümqi   44.20N    87.20E   9.472000   4.331800

[3510 rows x 5 columns]


In [11]:
# Function to convert latitude and longitude to numeric
def convert_coordinates(coord):
    if coord[-1] in ['N', 'E']:
        return float(coord[:-1])
    elif coord[-1] in ['S', 'W']:
        return -float(coord[:-1])
    else:
        return float(coord)

# Convert 'Latitude' and 'Longitude' to numeric values
city_temp_extremes['Latitude'] = city_temp_extremes['Latitude'].apply(convert_coordinates)
city_temp_extremes['Longitude'] = city_temp_extremes['Longitude'].apply(convert_coordinates)

# Add the TemperatureDifference column
city_temp_extremes['TemperatureDifference'] = city_temp_extremes['max'] - city_temp_extremes['min']

# Drop the 'max' and 'min' columns
final_df = city_temp_extremes.drop(columns=['max', 'min'])

# Save the final DataFrame to a CSV file
final_df.to_csv('city_temp_extremes.csv', index=False)

print(final_df)


          City  Latitude  Longitude  TemperatureDifference
0     A Coruña     42.59      -8.73               4.251917
1       Aachen     50.63       6.34               5.379333
2      Aalborg     57.05      10.33               5.583000
3          Aba      5.63       8.07               2.770000
4       Abadan     29.74      48.00              20.657000
...        ...       ...        ...                    ...
3505     Århus     57.05      10.33               5.583000
3506     Çorlu     40.99      27.69               6.679042
3507     Çorum     40.99      34.08               4.912375
3508   Öskemen     50.63      82.39               5.584500
3509    Ürümqi     44.20      87.20               5.140200

[3510 rows x 4 columns]


In [3]:
import pandas as pd

# List of CSV files to read
csv_files = ['1980.csv', '1990.csv', '2000.csv', '2010.csv','2020.csv']

# Empty list to store dataframes
dfs = []

weather_code_map = {
    0: 'Clear sky',
    1: 'Clear sky', 2: 'Cloudy', 3: 'Cloudy',
    45: 'Fog', 48: 'Fog',
    51: 'Drizzle', 53: 'Drizzle', 55: 'Drizzle', 56: 'Drizzle', 57: 'Drizzle',
    61: 'Moderate Rain', 63: 'Moderate Rain', 65: 'Moderate Rain', 66: 'Moderate Rain', 67: 'Moderate Rain',
    71: 'Snow fall', 73: 'Snow fall', 75: 'Snow fall', 77: 'Snow fall',
    80: 'Rain showers', 81: 'Rain showers', 82: 'Rain showers', 85: 'Rain showers', 86: 'Rain showers',
    95: 'Thunderstorm', 96: 'Thunderstorm', 99: 'Thunderstorm'
}


# Iterate over the list of CSV files
for file in csv_files:
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file)
    
    # Keep only columns: city_name, datetime, temperature, weather_code, and precipitation
    df = df[['city_name', 'datetime', 'temperature_2m_mean', 'weather_code', 'precipitation_sum','sunshine_duration','shortwave_radiation_sum','snowfall_sum']]

    # Convert datetime column to pandas datetime format
    df['datetime'] = pd.to_datetime(df['datetime'], format='%Y-%m-%d')
    
    # Extract year and month from the datetime column
    df['year'] = df['datetime'].dt.to_period('Y')

    df['weather_code'] = df['weather_code'].map(weather_code_map).fillna('Unknown') 
    
    # Append the DataFrame to the list
    dfs.append(df)

# Concatenate all DataFrames in the list
final_df = pd.concat(dfs, ignore_index=True)

# Group by city_name and year_month to calculate the mean temperature, mode of weather_code, and mean precipitation
final_df_grouped = final_df.groupby(['city_name', 'year']).agg(
    temperature=('temperature_2m_mean', 'mean'),
    precipitation=('precipitation_sum', 'mean'),
    snowfall=('snowfall_sum', 'mean'),
    sunshine = ('sunshine_duration', 'mean'),
    shortwave_radiation=('shortwave_radiation_sum','mean'),
).reset_index()

final_df_grouped['temperature'] = final_df_grouped['temperature'].round(2)
final_df_grouped['precipitation'] = final_df_grouped['precipitation'].round(2)
final_df_grouped['snowfall'] = final_df_grouped['snowfall'].round(2)
final_df_grouped['sunshine']/=3600
final_df_grouped['sunshine'] = final_df_grouped['sunshine'].round(2)
final_df_grouped['shortwave_radiation'] = final_df_grouped['shortwave_radiation'].round(2)

# Write the resulting DataFrame to a new CSV file
final_df_grouped.to_csv('datasetNEW.csv', index=False)

print("Average temperature, precipitation, and weather code mode per month for each city has been saved to 'average_temperature_precipitation_weather_code.csv'.")


Average temperature, precipitation, and weather code mode per month for each city has been saved to 'average_temperature_precipitation_weather_code.csv'.


In [None]:
csv_files = ['1980.csv', '1990.csv', '2000.csv', '2010.csv','2020.csv']
dfs = []
for file in csv_files:
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file)
    
    # Keep only columns: city_name, datetime, temperature, weather_code, and precipitation
    df = df[['city_name', 'datetime', 'temperature_2m_mean', 'precipitation_sum','daylight_duration','shortwave_radiation_sum','et0_fao_evapotranspiration']]

    # Convert datetime column to pandas datetime format
    df['datetime'] = pd.to_datetime(df['datetime'], format='%Y-%m-%d')
    
    # Extract year and month from the datetime column
    df['year'] = df['datetime'].dt.to_period('Y')
    
    # Append the DataFrame to the list
    dfs.append(df)

final_df = pd.concat(dfs, ignore_index=True)

    df['mean_temperature'] = df['temperature_2m_mean'].agg('mean')

    df['mean_precipitation'] = df['precipitation_sum'].agg('mean')
    df['mean_daylight_duration'] = df['daylight_duration'].agg('mean')
    df['mean_shortwave_radiation_sum'] = df['shortwave_radiation_sum'].agg('mean')
    df['mean_evapotranspiration'] = df['et0_fao_evapotranspiration'].agg('mean')
    

ModuleNotFoundError: No module named 'dash'

In [None]:
temp
precipitation
daylight
shortwave_radiation_sum
et0_fao_evapotranspiration