In [1]:
import os
import pickle
from pathlib import Path
from google.auth.transport.requests import Request
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

# Define the path to the credentials.json file.
credentials_path = Path('../../credentials.json').resolve()

# Ensure token.pickle is stored in the same directory as credentials.json
token_path = credentials_path.parent / 'token.pickle'

# If modifying the folder in future, modify these scopes
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']

# Authenticate and create the service
def authenticate():
    creds = None
    # The file token.pickle stores the user's access and refresh tokens.
    if token_path.exists():
        with token_path.open('rb') as token:
            creds = pickle.load(token)

    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                str(credentials_path), SCOPES)
            creds = flow.run_local_server(port=0)

        # Save the credentials for the next run
        with token_path.open('wb') as token:
            pickle.dump(creds, token)

    # Build the service
    service = build('drive', 'v3', credentials=creds)
    return service

service = authenticate()


In [2]:
# Function to list files in a folder
def list_files_in_folder(folder_id, service = service):
    results = service.files().list(fields="files(id, name)", q=f"mimeType='text/csv'").execute()
    files = results.get('files', [])
    return files
folder_id = '1sIxF_whGAXBwk5rpXcN53TPJ2JOxAcm8'

files = list_files_in_folder(folder_id)

In [3]:
import io
import pandas as pd
from googleapiclient.http import MediaIoBaseDownload

# Function to download a file using pandas.read_csv and the pyarrow engine
def download_file(file_id, file_name, service = service):
    # Create a request to get the file from Google Drive
    request = service.files().get_media(fileId=file_id)
    # Use BytesIO to store the file in memory
    fh = io.BytesIO()
    downloader = MediaIoBaseDownload(fh, request)

    done = False
    while done is False:
        status, done = downloader.next_chunk()
        print(f"Downloading {file_name} - {int(status.progress() * 100)}%")

    # After the download is complete, seek to the start of the file in memory
    fh.seek(0)

    # Read the CSV file into a pandas DataFrame using the pyarrow engine
    print(f"Reading {file_name} into pandas DataFrame using pyarrow engine...")
    try:
        df = pd.read_csv(fh, dtype={'lat':'Float64','long':'Float64'}, engine='pyarrow', na_values = ' ', keep_default_na=True)
        print(f"Loaded {file_name} successfully into a DataFrame.")
    except Exception as e:
        print(f"Error reading {file_name}: {e}")
        return None

    return df  # You can return the DataFrame for further processing

In [4]:
matches = ['Patna','static']

# get patna static files
patna_static = {}
for file in files:
    if all(m in file['name'] for m in matches):
        patna_static[file['name']] = download_file(file_id = file['id'], file_name = file['name'])
patna_static = pd.concat([df for df in patna_static.values()])

Downloading 3_vayu_Patna_static_sensor_data_September_2024.csv - 100%
Reading 3_vayu_Patna_static_sensor_data_September_2024.csv into pandas DataFrame using pyarrow engine...
Loaded 3_vayu_Patna_static_sensor_data_September_2024.csv successfully into a DataFrame.
Downloading 4_vayu_Patna_static_sensor_data_October_2024.csv - 100%
Reading 4_vayu_Patna_static_sensor_data_October_2024.csv into pandas DataFrame using pyarrow engine...
Loaded 4_vayu_Patna_static_sensor_data_October_2024.csv successfully into a DataFrame.
Downloading 6_vayu_Patna_static_sensor_data_December_2024.csv - 100%
Reading 6_vayu_Patna_static_sensor_data_December_2024.csv into pandas DataFrame using pyarrow engine...
Loaded 6_vayu_Patna_static_sensor_data_December_2024.csv successfully into a DataFrame.
Downloading 1_vayu_Patna_static_sensor_data_July_2024.csv - 100%
Reading 1_vayu_Patna_static_sensor_data_July_2024.csv into pandas DataFrame using pyarrow engine...
Loaded 1_vayu_Patna_static_sensor_data_July_2024.csv

In [17]:
from vayu_gnn.dbx.dbx_config import dbx_helper
p_dict = patna_static.dropna(subset = ['lat','long']).groupby('device_name').agg({'lat':'median', 'long':'median'}).to_dict(orient='index')
dbx_helper.write_pickle(p_dict, dbx_helper.clean_input_path, 'map_device_to_latlon', 'Patna_static.pickle')

File 'Patna_static.pickle' successfully uploaded to Dropbox path: '/input/clean/map_device_to_latlon/Patna_static.pickle'


In [7]:
matches = ['Gurugram','static']

# get patna static files
gurugram_static = {}
for file in files:
    if all(m in file['name'] for m in matches):
        gurugram_static[file['name']] = download_file(file_id = file['id'], file_name = file['name'])
gurugram_static = pd.concat([df for df in gurugram_static.values()])

Downloading vayu_Gurugram_static_sensor_data_September_2024.csv - 100%
Reading vayu_Gurugram_static_sensor_data_September_2024.csv into pandas DataFrame using pyarrow engine...
Loaded vayu_Gurugram_static_sensor_data_September_2024.csv successfully into a DataFrame.
Downloading vayu_Gurugram_static_sensor_data_October_2024.csv - 100%
Reading vayu_Gurugram_static_sensor_data_October_2024.csv into pandas DataFrame using pyarrow engine...
Loaded vayu_Gurugram_static_sensor_data_October_2024.csv successfully into a DataFrame.
Downloading vayu_Gurugram_static_sensor_data_November_2024.csv - 100%
Reading vayu_Gurugram_static_sensor_data_November_2024.csv into pandas DataFrame using pyarrow engine...
Loaded vayu_Gurugram_static_sensor_data_November_2024.csv successfully into a DataFrame.
Downloading vayu_Gurugram_static_sensor_data_July_2024.csv - 100%
Reading vayu_Gurugram_static_sensor_data_July_2024.csv into pandas DataFrame using pyarrow engine...
Loaded vayu_Gurugram_static_sensor_data_J

In [None]:
# omit tara 039 from the gurugram devices - its coordinates aren't in gurugram

In [18]:
gurugram_static.query('device_name == "TARA039"').round(4).value_counts(['lat','long']).reset_index().sort_values('lat')

Unnamed: 0,lat,long,count
18,26.5086,80.2706,1
12,26.5087,80.271,4
19,26.5087,80.2711,1
0,26.5088,80.271,503
22,26.5088,80.2713,1
16,26.5088,80.2709,3
9,26.5088,80.2712,13
5,26.5088,80.2711,138
15,26.5089,80.2713,3
6,26.5089,80.2712,56


In [19]:
patna_static

Unnamed: 0,id,device_name,lat,long,pm_25,pm_10,no2,co,co2,ch4,temp,rh,data_created_time
0,3348730,TARA086,25.596664,85.226929,53.1,71.7,21.03,0.71,,,35.5,65.0,2024-09-01 00:00:00+00:00
1,3348732,TARA077,25.589079,85.236862,41.2,55.6,6.22,0.98,,,36.5,58.0,2024-09-01 00:00:00+00:00
2,3348733,TARA017,25.622417,85.091667,63.0,85.2,282.57,,447.0,,35.1,62.0,2024-09-01 00:00:00+00:00
3,3348734,TARA064,25.631643,85.115936,64.8,87.6,185.00,1.18,,,36.1,62.0,2024-09-01 00:00:00+00:00
4,3348737,TARA047,25.586788,85.249763,44.7,60.4,816.00,,453.0,,34.5,69.0,2024-09-01 00:00:00+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1012040,9367112,TARA026,25.636164,85.104828,303.9,431.7,4.03,1.50,,,25.8,36.0,2024-11-30 23:59:00+00:00
1012041,9367113,TARA032,25.619493,85.126152,210.9,247.2,4.20,,456.0,,28.0,38.0,2024-11-30 23:59:00+00:00
1012042,9367116,TARA073,25.580191,85.190178,180.0,221.1,948.00,1.17,,,27.1,40.0,2024-11-30 23:59:00+00:00
1012043,9367117,TARA028,25.586571,85.044273,23.3,31.5,10.43,,,1.95,21.6,56.0,2024-11-30 23:59:00+00:00


In [14]:
gurugram_static.dropna(subset = ['lat','long']).groupby('device_name').agg({'lat':'median', 'long':'median'}).sort_values('long')

Unnamed: 0_level_0,lat,long
device_name,Unnamed: 1_level_1,Unnamed: 2_level_1
TARA037,28.397772,76.928551
TARA075,28.405016,76.94223
TARA038,28.464142,76.97438
TARA033,28.401331,76.986786
TARA004,28.4743,76.996872
TARA029,28.443123,76.999329
TARA024,28.401754,77.005295
TARA046,28.503603,77.007599
TARA007,28.475147,77.010078
TARA051,28.475126,77.010086
