In [51]:
import requests
import pandas as pd
import datetime
import pyarrow as pa
import pyarrow.parquet as pq

class WeatherDataProcessor:
    def __init__(self, latitude, longitude, past_days=61):
        self.latitude = latitude
        self.longitude = longitude
        self.past_days = past_days
        self.api_url = f"https://api.open-meteo.com/v1/forecast?latitude={self.latitude}&longitude={self.longitude}&hourly=temperature_2m,rain,showers,visibility&past_days={self.past_days}"

    def fetch_data(self):
        try:
            response = requests.get(self.api_url)
            response.raise_for_status()
            data = response.json()
            return data
        except requests.RequestException as e:
            print(f"Error fetching data: {e}")
            return None

    def process_data(self, data):
        if data is None:
            return None
        
        daily_data = {}
  
        for index,item in enumerate(data['hourly']['time']):
 
            timestamp = datetime.datetime.strptime(item, '%Y-%m-%dT%H:%M')
            date_only = timestamp.date()
            
            date = date_only
            if date not in daily_data:
                daily_data[date] = {'temperature_2m': 0, 'rain': 0, 'showers': 0, 'visibility': 0}
           
            daily_data[date]['temperature_2m'] += data['hourly']['temperature_2m'][index]
            daily_data[date]['rain'] += data['hourly']['rain'][index]
            daily_data[date]['showers'] += data['hourly']['showers'][index]
            daily_data[date]['visibility'] += data['hourly']['visibility'][index]
        # Convert dictionary to DataFrame
        df = pd.DataFrame.from_dict(daily_data, orient='index')
        df.index.name = 'date'
        return df

    def save_to_parquet(self, dataframe, filename):
        try:
            table = pa.Table.from_pandas(dataframe)
            pq.write_table(table, filename)
            print(f"Data saved to {filename}")
        except Exception as e:
            print(f"Error saving data to Parquet: {e}")

# Define location
latitude = 51.5085
longitude = -0.1257

# Create instance of WeatherDataProcessor
processor = WeatherDataProcessor(latitude, longitude)

# Fetch data
data = processor.fetch_data()

# Process data
df = processor.process_data(data)

# Save data to Parquet file
processor.save_to_parquet(df, 'weather_data.parquet')


Data saved to weather_data.parquet


In [56]:
import unittest
from unittest.mock import patch
import pandas as pd
from Weather_Data_ETL import WeatherDataProcessor 


class TestWeatherDataProcessor(unittest.TestCase):
    @patch('Weather_Data_ETL.requests.get')
    def test_fetch_data_success(self, mock_get):
        # Set up mock response
        mock_response = {
            'hourly': {
                'time': ['2023-11-26T00:00'],
                'temperature_2m': [20],
                'rain': [0],
                'showers': [0],
                'visibility': [10]
            }
        }
        mock_get.return_value.json.return_value = mock_response

        # Create instance of WeatherDataProcessor
        processor = WeatherDataProcessor(latitude=51.5085, longitude=-0.1257, past_days=1)

        # Fetch data
        data = processor.fetch_data()

        # Verify data is fetched successfully
        self.assertIsNotNone(data)

    def test_process_data(self):
        # Sample data for testing process_data method
        sample_data = {
            'hourly': {
                'time': ['2023-11-26T00:00'],
                'temperature_2m': [20],
                'rain': [0],
                'showers': [0],
                'visibility': [10]
            }
        }
        
        # Create instance of WeatherDataProcessor
        processor = WeatherDataProcessor(latitude=51.5085, longitude=-0.1257, past_days=1)

        # Process data
        df = processor.process_data(sample_data)

        # Verify data processing
        self.assertIsInstance(df, pd.DataFrame)
        self.assertEqual(len(df), 1)

    def test_save_to_parquet(self):
        # Sample DataFrame for testing save_to_parquet method
        sample_df = pd.DataFrame({
            'date': ['2023-11-26'],
            'temperature_2m': [20],
            'rain': [0],
            'showers': [0],
            'visibility': [10]
        })

        # Create instance of WeatherDataProcessor
        processor = WeatherDataProcessor(latitude=51.5085, longitude=-0.1257, past_days=1)

        # Save data to Parquet file
        processor.save_to_parquet(sample_df, 'test_weather_data.parquet')

        # Check if file exists
        import os
        self.assertTrue(os.path.exists('test_weather_data.parquet'))

        # Clean up (remove the test file)
        os.remove('test_weather_data.parquet')


if __name__ == '__main__':
    unittest.main(argv=[''], exit=False)


...
----------------------------------------------------------------------
Ran 3 tests in 0.008s

OK


Data saved to test_weather_data.parquet
