In [1]:
import json
import csv
import pandas as pd
import requests

In [2]:
def readfile(filepath= "./data_json/cap[665].json"):
    with open(filepath, 'r') as json_file:
        return json.load(json_file)


In [33]:
from pydantic import BaseModel

class City(BaseModel):
    name: str
    region: str
    country: str
    latitude: float
    longitude: float
    utcOffset: float = 1.0
    zone: str = "Africa/Lagos"
    


class Astronomy(BaseModel):
    city: str = ""
    date: str = ""
    sunrise: str
    sunset: str
    moonrise: str
    moonset: str
    moon_phase: str
    moon_illumination: int
    
    
class Weather(BaseModel):
    city: str = ""
    date: str = ""  
    maxtempC: float
    maxtempF: float
    mintempC: float
    mintempF: float
    avgtempC: float
    avgtempF: float
    totalSnow_cm: float
    sunHour: float
    uvIndex: float
    

In [34]:
from typing import Optional
class ParseFile:
    def __init__(self, json_handler: Optional[dict]):
        
        if type(json_handler) != dict:
            json_file = json_handler()
        else:
            json_file = json_handler
        
        self.json_data = json_file.get('data')
        self.time_zone = self.json_data['time_zone'][0]
        self.weather: Optional[Weather] = None
        self.astronomy: Optional[Astronomy] = None

    def parse_area(self) -> City:
        """
        ------------
        return type: dict
        ------------

        """

        area = self.json_data['nearest_area'][0]

        area_info = []
        for ar in area.items():
            ar = list(ar)
            if ar[0] == 'areaName':
                ar[0] = 'name'
            if type(ar[1]) == list:
                ar[1] = ar[1][0]['value']
            area_info.append(ar)
        
        area_info = dict(area_info)
        

        self.city = area_info['name']
        area_info['zone'] = self.time_zone['zone']
        area_info['utcOffset'] = self.time_zone['utcOffset']
        city = City.parse_obj(area_info)
        return city

    
    def parse_weather(self) -> tuple:
        if not (self.weather and self.astronomy):

            weather_dict = self.json_data.get('weather')[0]  
            weather_dict.pop('date')
            weather_dict['city'] = self.city
            weather_dict['date'] = self.time_zone['localtime']
            hourly = weather_dict.pop('hourly')[0]
            hourly['city'] = self.city
            hourly['time'] = self.time_zone['localtime']
            
            #weather_dict['hourly'] = hourly

            astronomy_dict = weather_dict.pop('astronomy')[0]
            astronomy = Astronomy.parse_obj(astronomy_dict)
            astronomy.city = self.city
            astronomy.date = self.time_zone['localtime']

            

            weather = Weather.parse_obj(weather_dict)
            self.weather = weather
            self.astronomy = astronomy
            self.hourly = hourly

        return self.weather, self.astronomy, self.hourly

                
            
            
            

In [46]:

from datetime import datetime
import shutil
import os


def to_csv(filename: str, data):

    dirname = "./weather" + datetime.now().strftime("%Y-%m-%d-%h")
    if os.path.isdir(dirname):
        shutil.rmtree(dirname, ignore_errors=False, onerror=None)  
    os.mkdir(dirname)

    filepath = dirname+ "/" + filename
    with open(filepath, 'a+') as fp:
        writer = csv.DictWriter(fp, data)
        writer.writeheader()
        writer.writerow( data)

In [47]:
from data_api import gen_state

def process():

    state = pd.read_csv("list_of_capitals.csv").dropna()
    capitals = state['Capital'].values

    # Extract
    for data in gen_state(capitals):
    
        # Transform
        parser = ParseFile(json_handler=data)

        city = parser.parse_area()
        weather, astronomy, hourly  = parser.parse_weather()

        # Load (to csv)
        to_csv('city.csv',city.dict())
        to_csv('weather.csv', weather.dict())
        to_csv('astronomy.csv', astronomy.dict())
        to_csv('metadata.csv', hourly)




In [48]:
process()

In [45]:
import shutil
shutil.rmtree('./testdir', ignore_errors=False, onerror=None)