In [1]:
import json
import csv
import pandas as pd
import requests

In [2]:
def readfile(filepath= "./data_json/cap[665].json"):
    
    with open(filepath, 'r') as json_file:
        return json.load(json_file)


In [4]:
def test_func():
    test_func.has_been_called = True
    return "This is a test done"

In [8]:
test_func.has_been_called = False

In [9]:
test_func.has_been_called

False

In [10]:
from pydantic import BaseModel

class City(BaseModel):
    name: str
    region: str
    country: str
    latitude: float
    longitude: float
    utcOffset: float = 1.0
    zone: str = "Africa/Lagos"
    


class Astronomy(BaseModel):
    city: str = ""
    date: str = ""
    sunrise: str
    sunset: str
    moonrise: str
    moonset: str
    moon_phase: str
    moon_illumination: int
    
    
class Weather(BaseModel):
    city: str = ""
    date: str = ""  
    maxtempC: float
    maxtempF: float
    mintempC: float
    mintempF: float
    avgtempC: float
    avgtempF: float
    totalSnow_cm: float
    sunHour: float
    uvIndex: float
    

In [11]:
from typing import Optional
class ParseFile:
    def __init__(self, json_handler: Optional[dict]):
        
        if type(json_handler) != dict:
            json_file = json_handler()
        else:
            json_file = json_handler
        
        self.json_data = json_file.get('data')
        self.time_zone = self.json_data['time_zone'][0]
        self.weather: Optional[Weather] = None
        self.astronomy: Optional[Astronomy] = None

    def parse_area(self) -> City:
        """
        ------------
        return type: dict
        ------------

        """

        area = self.json_data['nearest_area'][0]

        area_info = []
        for ar in area.items():
            ar = list(ar)
            if ar[0] == 'areaName':
                ar[0] = 'name'
            if type(ar[1]) == list:
                ar[1] = ar[1][0]['value']
            area_info.append(ar)
        
        area_info = dict(area_info)
        

        self.city = area_info['name']
        area_info['zone'] = self.time_zone['zone']
        area_info['utcOffset'] = self.time_zone['utcOffset']
        city = City.parse_obj(area_info)
        return city

    
    def parse_weather(self) -> tuple:
        if not (self.weather and self.astronomy):

            weather_dict = self.json_data.get('weather')[0]  
            weather_dict.pop('date')
            weather_dict['city'] = self.city
            weather_dict['date'] = self.time_zone['localtime']
            hourly = weather_dict.pop('hourly')[0]
            hourly['city'] = self.city
            hourly['time'] = self.time_zone['localtime']
            
            #weather_dict['hourly'] = hourly

            astronomy_dict = weather_dict.pop('astronomy')[0]
            astronomy = Astronomy.parse_obj(astronomy_dict)
            astronomy.city = self.city
            astronomy.date = self.time_zone['localtime']

            

            weather = Weather.parse_obj(weather_dict)
            self.weather = weather
            self.astronomy = astronomy
            self.hourly = hourly

        return self.weather, self.astronomy, self.hourly

                
            
            
            

In [12]:

from datetime import datetime
import os


def to_csv(filename: str, data):

    dirname = "./weather" + datetime.now().strftime("%Y-%m-%d-%h")
    if not os.path.isdir(dirname):
        os.mkdir(dirname)

    filepath = dirname+ "/" + filename
    if os.path.isfile(filepath):
        os.remove(filepath)

    with open(filepath, 'a+') as fp:
        writer = csv.DictWriter(fp, data)
        writer.writeheader()
        writer.writerow( data)

In [29]:
from urllib import response
import boto3
from io import StringIO
import os
import pandas as pd


def create_filestreams(data):

    """Create file streams for city, wetaher, astronomy, and hourly"""

    
    create_filestreams.has_been_called = True

    global streams, writers, files
    streams = {}
    writers = {}
    files = ['city', 'weather', 'astronomy', 'hourly']
    for fl in files:

        streams[fl] = StringIO()
        writers[fl] = csv.DictWriter(streams[fl], data[fl])
        writers[fl].writeheader()

def load_file(data):
    for fl in files:
        writers[fl].writerow(data[fl])


    

def upload_files(bucket="weather-ng"):
    
    for filename in files:
        file = streams[filename].getvalue()
        s3_resource = boto3.resource('s3')
        res = s3_resource.Object(bucket, filename+'.csv').put(Body=file)
        #if res['ResponseMetadata']['HTTPStatusCode'] == 200:
            

In [30]:
from data_api import gen_state

def process(upload=False):

    state = pd.read_csv("list_of_capitals.csv").dropna()
    capitals = state['Capital'].values


    # Extract
    weather_data_gen = iter(gen_state(capitals))
    create_filestreams.has_been_called = False
    while True:
        
        try:
            jsondata = next(weather_data_gen)

            # Transform
            parser = ParseFile(json_handler=jsondata)
            data = {}

            city = parser.parse_area()
            weather, astronomy, hourly  = parser.parse_weather()

            

            data['city'] = city.dict()
            data['weather'] = weather.dict()
            data['astronomy'] = astronomy.dict()
            data['hourly'] = hourly

            # Load to s3

            if create_filestreams.has_been_called == False:
                create_filestreams(data=data)

            load_file(data=data)

        except StopIteration:
            print("Loading files completed!\nUploading file to aws s3 bucket...")
            upload_files()
            print('Uploaded')
            break



In [31]:
process()

Loading files completed!
Uploading file to aws s3 bucket...
Uploaded


In [70]:

state = pd.read_csv("list_of_capitals.csv").dropna()
capitals = state['Capital'].values

it = iter(gen_state(capitals))

        #weather, astronomy, hourly  = parser.parse_weather()

In [75]:
csv_buffer = StringIO()
data = next(it)
parser = ParseFile(json_handler=data)

city = parser.parse_area()
writer = csv.DictWriter(csv_buffer, city.dict())

writer.writeheader()

In [80]:



writer.writerow(city.dict())
file = csv_buffer.getvalue()

In [3]:
from io import StringIO, BytesIO
import boto3
import pandas as pd
from process import get_session

In [45]:
import shutil
shutil.rmtree('./testdir', ignore_errors=False, onerror=None)

In [45]:
BucketName = "weather-ng"
file = "weather.csv"
session = get_session()
s3 = session.client('s3')

file_obj = s3.get_object(Bucket=BucketName, Key=file)
file_stream = BytesIO(file_obj['Body'].read())
weather_file = pd.read_csv(file_stream)

In [2]:
def download_file(bucket="weather-ng"):
    session = get_session()
    s3 = session.client('s3')

    for file in s3.list_objects(Bucket=bucket)['Contents']:
        filename = file['Key']
        file_obj = s3.get_object(Bucket=bucket, Key=filename)
        file_stream = file_obj['Body'].read()
        file = pd.read_csv(BytesIO(file_stream))
        print(file.head(2))
    

In [7]:
from process import download_file

In [8]:
files = download_file()

In [10]:
def transform(files):

IndentationError: expected an indented block (653364305.py, line 1)

In [30]:
data['date']

0     2022-09-05 00:24
1     2022-09-05 00:24
2     2022-09-05 00:24
3     2022-09-05 00:24
4     2022-09-05 00:24
5     2022-09-05 00:24
6     2022-09-05 00:24
7     2022-09-05 00:24
8     2022-09-05 00:24
9     2022-09-05 00:24
10    2022-09-05 00:24
11    2022-09-05 00:24
12    2022-09-05 00:24
13    2022-09-05 00:24
14    2022-09-05 00:24
15    2022-09-05 00:24
16    2022-09-05 00:24
17    2022-09-05 00:24
18    2022-09-05 00:24
19    2022-09-05 00:24
20    2022-09-05 00:24
21    2022-09-05 00:24
22    2022-09-05 00:24
23    2022-09-05 00:24
24    2022-09-05 00:24
25    2022-09-05 00:24
26    2022-09-05 00:24
27    2022-09-05 00:24
28    2022-09-05 00:24
29    2022-09-05 00:24
30    2022-09-05 00:24
31    2022-09-05 00:24
32    2022-09-05 00:24
33    2022-09-05 00:24
34    2022-09-05 00:24
35    2022-09-05 00:24
36    2022-09-05 00:24
37    2022-09-05 00:24
Name: date, dtype: object