In [None]:
import os
import pickle
from pathlib import Path
from google.auth.transport.requests import Request
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

# Define the path to the credentials.json file.
credentials_path = Path('../../credentials.json').resolve()

# Ensure token.pickle is stored in the same directory as credentials.json
token_path = credentials_path.parent / 'token.pickle'

# If modifying the folder in future, modify these scopes
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']

# Authenticate and create the service
def authenticate():
    creds = None
    # The file token.pickle stores the user's access and refresh tokens.
    if token_path.exists():
        with token_path.open('rb') as token:
            creds = pickle.load(token)

    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                str(credentials_path), SCOPES)
            creds = flow.run_local_server(port=0)

        # Save the credentials for the next run
        with token_path.open('wb') as token:
            pickle.dump(creds, token)

    # Build the service
    service = build('drive', 'v3', credentials=creds)
    return service

service = authenticate()


Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=26632814936-8m33q1kq9qk2ba4utp1iaj89ovr04kmh.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A51252%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.readonly&state=RZZLqNl3tneFqDsMC4AbjJq4jD6BIS&access_type=offline


In [4]:
# Function to list files in a folder
def list_files_in_folder(folder_id, service = service):
    results = service.files().list(fields="files(id, name)", q=f"mimeType='text/csv'").execute()
    files = results.get('files', [])
    return files
folder_id = '1sIxF_whGAXBwk5rpXcN53TPJ2JOxAcm8'

files = list_files_in_folder(folder_id)

In [None]:
import io
import pandas as pd
from googleapiclient.http import MediaIoBaseDownload

# Function to download a file using pandas.read_csv and the pyarrow engine
def download_file(file_id, file_name, service = service):
    # Create a request to get the file from Google Drive
    request = service.files().get_media(fileId=file_id)
    # Use BytesIO to store the file in memory
    fh = io.BytesIO()
    downloader = MediaIoBaseDownload(fh, request)

    done = False
    while done is False:
        status, done = downloader.next_chunk()
        print(f"Downloading {file_name} - {int(status.progress() * 100)}%")

    # After the download is complete, seek to the start of the file in memory
    fh.seek(0)

    # Read the CSV file into a pandas DataFrame using the pyarrow engine
    print(f"Reading {file_name} into pandas DataFrame using pyarrow engine...")
    try:
        df = pd.read_csv(fh, dtype={'lat':'Float64','long':'Float64'}, engine='pyarrow', na_values = ' ', keep_default_na=True)
        print(f"Loaded {file_name} successfully into a DataFrame.")
    except Exception as e:
        print(f"Error reading {file_name}: {e}")
        return None

    return df  # You can return the DataFrame for further processing

In [None]:
matches = ['Patna','static']

# get patna static files
patna_static = {}
for file in files:
    if all(m in file['name'] for m in matches):
        patna_static[file['name']] = download_file(file_id = file['id'], file_name = file['name'])
patna_static = pd.concat([df for df in patna_static.values()])

Downloading 3_vayu_Patna_static_sensor_data_September_2024.csv - 100%
Reading 3_vayu_Patna_static_sensor_data_September_2024.csv into pandas DataFrame using pyarrow engine...
Loaded 3_vayu_Patna_static_sensor_data_September_2024.csv successfully into a DataFrame.
Downloading 4_vayu_Patna_static_sensor_data_October_2024.csv - 100%
Reading 4_vayu_Patna_static_sensor_data_October_2024.csv into pandas DataFrame using pyarrow engine...
Loaded 4_vayu_Patna_static_sensor_data_October_2024.csv successfully into a DataFrame.
Downloading 6_vayu_Patna_static_sensor_data_December_2024.csv - 100%
Reading 6_vayu_Patna_static_sensor_data_December_2024.csv into pandas DataFrame using pyarrow engine...
Loaded 6_vayu_Patna_static_sensor_data_December_2024.csv successfully into a DataFrame.
Downloading 1_vayu_Patna_static_sensor_data_July_2024.csv - 100%
Reading 1_vayu_Patna_static_sensor_data_July_2024.csv into pandas DataFrame using pyarrow engine...
Loaded 1_vayu_Patna_static_sensor_data_July_2024.csv

In [29]:
patna_static.query('device_name == "TARA021"')[['lat','long']].round(2).value_counts()

lat    long 
25.61  85.13    840
Name: count, dtype: int64

In [44]:
patna_static.query('lat > 20 & long > 70 & long < 90 & lat < 30 ').groupby('device_name').agg({'lat':['median','std'],'long':['median','std']}).sort_values(('long','std'), ascending=False).head()

Unnamed: 0_level_0,lat,lat,long,long
Unnamed: 0_level_1,median,std,median,std
device_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
TARA062,25.614405,0.122193,85.104294,0.660378
TARA043,25.600883,0.113949,85.125656,0.612743
TARA099,25.610891,0.080981,85.201241,0.440713
TARA016,25.578136,0.046194,85.05687,0.236627
TARA047,25.586819,0.001401,85.249756,0.16335


In [38]:
tara062 = patna_static.query('device_name == "TARA062"').query('lat > 20 & long > 80 & long < 90 & lat < 30 ').sort_values('long')

In [43]:
pd.concat([tara062.head(3), tara062.tail(3)])[['id','device_name','lat','long', 'data_created_time']]

Unnamed: 0,id,device_name,lat,long,data_created_time
786984,11150681,TARA062,25.614424,80.033333,2024-12-23 21:17:00+00:00
528891,6319645,TARA062,26.508942,80.270897,2024-10-18 17:46:00+00:00
533125,6330318,TARA062,26.50881,80.270912,2024-10-18 20:56:00+00:00
343430,3959560,TARA062,25.61441,85.833336,2024-09-10 22:53:00+00:00
343387,3959467,TARA062,25.61441,85.833336,2024-09-10 22:51:00+00:00
343409,3959516,TARA062,25.61441,85.833336,2024-09-10 22:52:00+00:00


In [42]:
tara062.columns

Index(['id', 'device_name', 'lat', 'long', 'pm_25', 'pm_10', 'no2', 'co',
       'co2', 'ch4', 'temp', 'rh', 'data_created_time'],
      dtype='object')

In [46]:
matches = ['Gurugram','static']

# get patna static files
g_static = {}
for file in files:
    if all(m in file['name'] for m in matches):
        g_static[file['name']] = download_file(file_id = file['id'], file_name = file['name'])
g_static = pd.concat([df for df in g_static.values()])

Downloading vayu_Gurugram_static_sensor_data_September_2024.csv - 100%
Reading vayu_Gurugram_static_sensor_data_September_2024.csv into pandas DataFrame using pyarrow engine...
Loaded vayu_Gurugram_static_sensor_data_September_2024.csv successfully into a DataFrame.
Downloading vayu_Gurugram_static_sensor_data_October_2024.csv - 100%
Reading vayu_Gurugram_static_sensor_data_October_2024.csv into pandas DataFrame using pyarrow engine...
Loaded vayu_Gurugram_static_sensor_data_October_2024.csv successfully into a DataFrame.
Downloading vayu_Gurugram_static_sensor_data_November_2024.csv - 100%
Reading vayu_Gurugram_static_sensor_data_November_2024.csv into pandas DataFrame using pyarrow engine...
Loaded vayu_Gurugram_static_sensor_data_November_2024.csv successfully into a DataFrame.
Downloading vayu_Gurugram_static_sensor_data_July_2024.csv - 100%
Reading vayu_Gurugram_static_sensor_data_July_2024.csv into pandas DataFrame using pyarrow engine...
Loaded vayu_Gurugram_static_sensor_data_J

In [53]:
g_static.query('lat > 20 & long > 70 & long < 90 & lat < 30 ').groupby('device_name').agg({'lat':['median','std'],'long':['median','std']}).sort_values(('long','median'), ascending=False).head()

Unnamed: 0_level_0,lat,lat,long,long
Unnamed: 0_level_1,median,std,median,std
device_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
TARA039,26.508894,0.090027,80.271027,0.149311
TARA078,28.494034,4.7e-05,77.104424,0.001812
TARA023,28.446411,0.000136,77.100098,0.155729
TARA071,28.424295,0.005315,77.089455,0.000105
TARA045,28.498981,0.000796,77.088341,0.053508


In [59]:
pd.concat([g_static.query('lat > 20 & long > 70 & long < 90 & lat < 30 & device_name == "TARA059" ').sort_values('long').head(2),
           g_static.query('lat > 20 & long > 70 & long < 90 & lat < 30 & device_name == "TARA059" ').sort_values('long').tail(2)])[['id','device_name','lat','long', 'data_created_time']]

Unnamed: 0,id,device_name,lat,long,data_created_time
700645,9312776,TARA059,28.420063,77.01667,2024-11-30 07:43:00+00:00
700723,9312981,TARA059,28.420063,77.01667,2024-11-30 07:46:00+00:00
385179,6318643,TARA059,26.508913,80.271301,2024-10-18 17:28:00+00:00
342733,6184218,TARA059,26.508934,80.271362,2024-10-16 23:38:00+00:00
