In [1]:
%matplotlib inline
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
from shapely import wkt
import boto3
import botocore
# import datetime
import matplotlib.pyplot as plt
import os.path
import xarray as xr
import folium
import folium.plugins as plugins
import numpy as np
from datetime import datetime  # Import the datetime class
# import datetime


# We have our data in the df
helena = pd.read_csv('data/helena_tehsils.csv')

# view and merge files
helena = pd.read_csv('helena_tehsils_correct.csv')
helena['Correct_Tehsil'] = helena['Correct_Tehsil'].apply(lambda x: x.lower())

# Get the polygon shapes
table = gpd.read_file('coordinates/adm3/geoBoundaries-PAK-ADM3.dbf')
df_adm3 = pd.DataFrame(table)

polygons = df_adm3[['shapeName', 'geometry']]
polygons['shapeName'] = polygons['shapeName'].apply(lambda x: x.lower())

# Select Tehsils of interest
df_adm3['shapeName'] = df_adm3['shapeName'].apply(lambda x: x.lower())
common = list(set(helena['Correct_Tehsil']).intersection(df_adm3['shapeName']))
teh_interest = df_adm3[df_adm3['shapeName'].isin(common)]



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  polygons['shapeName'] = polygons['shapeName'].apply(lambda x: x.lower())


In [2]:
# Now we append the points for each tehsil
centroids = pd.read_csv('df_centroids.csv')
# turning into GeoPandasDF
centroids['point'] = centroids['point'].apply(wkt.loads)
centroids = gpd.GeoDataFrame(centroids)
# selecting the points column
points = centroids['point']

In [3]:
# for point in points:
teh_interest['points'] = teh_interest['geometry'].apply(lambda x: [point for point in points if x.contains(point) == True])
teh_interest.at[345, 'points'] = [Point(71.789,30.06)]
teh_interest.loc[teh_interest['shapeName'] == 'jahanian']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  teh_interest['points'] = teh_interest['geometry'].apply(lambda x: [point for point in points if x.contains(point) == True])


Unnamed: 0,shapeName,shapeISO,shapeID,shapeGroup,shapeType,geometry,points
345,jahanian,,9618217B73852408658670,PAK,ADM3,"POLYGON ((71.97218 30.07691, 71.97023 30.07773...",[POINT (71.789 30.06)]


In [4]:
era5_bucket = 'era5-pds'
# Without AWS key
client = boto3.client('s3', config=botocore.client.Config(signature_version=botocore.UNSIGNED))
# Available Years
# paginator = client.get_paginator('list_objects')
# result = paginator.paginate(Bucket=era5_bucket, Delimiter='/')
# for prefix in result.search('CommonPrefixes'):
#     print(prefix.get('Prefix'))

In [5]:
ls = list(zip(teh_interest['shapeName'], teh_interest['geometry'], teh_interest['points']))
ls[0][-1][0].x

72.997855

In [6]:
precip = 'precipitation_amount_1hour_Accumulation'
def kelvin_to_celcius(t):
    return t - 273.15

def kelvin_to_fahrenheit(t):
    return t * 9/5 - 459.67

def process_file(filepath, zipped, var):
    ds = xr.open_dataset(filepath)
    year_month = filepath.split('/')[-1][:6]
    frames = []
    for teh in zipped:
        name = teh[0]
        points = teh[-1]
        results = {'date': year_month, 'teh': name}
        if len(points) > 1:
            for i, point in enumerate(points):
                lon = point.x
                lat = point.y
                ds2 = ds.sel(lon=lon, lat=lat, method='nearest')
                values = ds2[var].values
                average = np.mean(values)
                median = np.median(values)
                std = np.std(values)
                
                results.update({
                    f'Point{i}_average': average, 
                    f'Point{i}_median': median, 
                    f'Point{i}_std': std, 
                    f'Point{i}_lat': lat, 
                    f'Point{i}_lon': lon
                })
        else:
            lon = point.x
            lat = point.y
            ds2 = ds.sel(lon=lon, lat=lat, method='nearest')
            values = ds2[var].values
            average = np.mean(values)
            median = np.median(values)
            std = np.std(values)
            
            results.update({
                f'Point0_average': average, 
                f'Point0_median': median, 
                f'Point0_std': std, 
                f'Point0_lat': lat, 
                f'Point0_lon': lon
            })

        frames.append(pd.DataFrame([results]))  # Constructing the dataframe with a single row
    return pd.concat(frames)


In [7]:
# Downloading variables of interest
air_temp = 'air_temperature_at_2_metres'
air_temp_max = 'air_temperature_at_2_metres_1hour_Maximum'
air_temp_min = 'air_temperature_at_2_metres_1hour_Minimum'
precip = 'precipitation_amount_1hour_Accumulation'

def request_era5(location, var, date):
    files = []
    year = date.strftime('%Y')
    month = date.strftime('%m')
    # s3 file path
    s3_path = '{year}/{month}/data/{var}.nc'
    file_path = 'era5_data/{location}/{year}{month}_{var}.nc'

    s3_data_key = s3_path.format(year=year, month=month, var=var)
    data_file = file_path.format(year=year, month=month, var=var, location=location)
    if not os.path.isfile(data_file): # check if file already exists
        print("Downloading %s from S3..." % s3_data_key)
        client.download_file(era5_bucket, s3_data_key, data_file)
    files.append(data_file)
    return files

# filenames = []
for year in range(2022, 2023):
    frames = []
    for i in range(1, 13):
        date = datetime.date(year,i,1) # update to desired date
        files = request_era5('air_temp', air_temp, date )
        for f in files:
            frames.append(process_file(filepath=f, zipped=ls, var=air_temp))
            # next delete file
            os.remove(f)
        res = pd.concat(frames)
    filename = f'air_temp{year}.csv'
    res.to_csv(filename)
    filenames.append(filename)
        

TypeError: descriptor 'date' for 'datetime.datetime' objects doesn't apply to a 'int' object

In [None]:
df_1 = pd.read_csv('rain_data/precip1981.csv')
# pakistan/rain_data/precip1980.csv
filenames = []
for i in range(1980, 2022):
    filenames.append(f'rain_data/precip{i}.csv')
    
for f in filenames:
    df_2 = pd.read_csv(f)
    df_1 = pd.concat([df_1, df_2])

df_1

Unnamed: 0.1,Unnamed: 0,date,teh,Point0_average,Point0_median,Point0_std,Point0_lat,Point0_lon,Point1_average,Point1_median,...,Point25_average,Point25_median,Point25_std,Point25_lat,Point25_lon,Point26_average,Point26_median,Point26_std,Point26_lat,Point26_lon
0,0,198101,bahawalnagar,2.108338e-05,0.0,0.000139,29.929706,72.997855,1.780192e-05,0.0,...,,,,,,,,,,
1,0,198101,chishtian,1.132104e-05,0.0,0.000069,29.429706,72.747855,1.698156e-05,0.0,...,,,,,,,,,,
2,0,198101,fort abbas,3.773679e-06,0.0,0.000037,28.929706,72.497855,3.937752e-06,0.0,...,,,,,,,,,,
3,0,198101,haroonabad,8.941978e-06,0.0,0.000055,29.179706,72.997855,,,...,,,,,,,,,,
4,0,198101,minchinabad,2.050913e-05,0.0,0.000121,30.179706,73.497855,2.575946e-05,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
475,0,202112,rojhan,5.414409e-06,0.0,0.000054,28.429706,69.747855,4.183862e-06,0.0,...,,,,,,,,,,
476,0,202112,burewala,5.742555e-07,0.0,0.000012,29.929706,72.747855,1.640730e-07,0.0,...,,,,,,,,,,
477,0,202112,mailsi,8.203650e-07,0.0,0.000014,29.929706,71.997855,1.394620e-06,0.0,...,,,,,,,,,,
478,0,202112,vehari,8.203650e-07,0.0,0.000020,29.929706,72.497855,9.024015e-07,0.0,...,,,,,,,,,,


In [None]:
# Change date column
def adjust_date(string):
    if type(string) is str:
        ls = string.split('-')
        print(ls)
        year = int(ls[0])
        month = int(ls[1])
    else:
        year = int(str(string)[:4])
        month = int(str(string)[4:])
    formatted_date = datetime(year, month, 1).strftime('%Y-%m')
    return formatted_date

df_1['date'] = df_1['date'].apply(lambda x: adjust_date(x))

['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '01']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '02']
['1981', '