## Weather Web Scraper
### by Kola Ademola

In [12]:
#import neccesary libraries

import pandas as pd
import numpy as np
from bs4 import BeautifulSoup as bs
import requests as rq
from datetime import datetime

In [3]:
#load dataset

countries = pd.read_csv('west_africa.csv')

#view dataset
countries

Unnamed: 0,country,capital
0,Benin,Porto-Novo
1,Burkina-Faso,Ouagadougou
2,Cameroon,Yaounde
3,Cape-Verde,Praia
4,Gambia,Banjul
5,Ghana,Accra
6,Guinea,Conakry
7,Guinea-Bissau,Bissau
8,Liberia,Monrovia
9,Mali,Bamako


In [10]:
weathers = []
temps = []
location = []

for index, row in countries.iterrows():
    country = row['country']
    capital = row['capital']
        
    url = 'https://www.timeanddate.com/weather/{}/{}'.format(country, capital)
    page = rq.get(url)
    
    soup = bs(page.content, 'html.parser')

    temp = soup.find('div', class_ = 'h2')
    weather = soup.find('p')
    location.append(country)
    weathers.append(weather.text)
    temps.append(temp.text)

In [15]:
current_date = datetime.now().date()

formatted_date = current_date.strftime('%Y-%m-%d')

data = {'Country': location, 'Current_Weather': weathers, 'Current_Temperature(c)': temps, 'Date' : formatted_date}

df = pd.DataFrame(data)
df

Unnamed: 0,Country,Current_Weather,Current_Temperature(c),Date
0,Benin,Thunderstorms. Passing clouds.,27 °C,2023-04-04
1,Burkina-Faso,Scattered clouds.,31 °C,2023-04-04
2,Cameroon,Passing clouds.,25 °C,2023-04-04
3,Cape-Verde,Passing clouds.,23 °C,2023-04-04
4,Gambia,Clear.,24 °C,2023-04-04
5,Ghana,Passing clouds.,27 °C,2023-04-04
6,Guinea,Passing clouds.,28 °C,2023-04-04
7,Guinea-Bissau,Clear.,23 °C,2023-04-04
8,Liberia,Passing clouds.,24 °C,2023-04-04
9,Mali,Clear.,25 °C,2023-04-04


* The temperature column will need to be formatted / cleaned to hold just the numeric value.

In [25]:
#duplicating original dataset before cleaning

weather_df = df.copy()

#view data
weather_df

Unnamed: 0,Country,Current_Weather,Current_Temperature(c),Date
0,Benin,Thunderstorms. Passing clouds.,27 °C,2023-04-04
1,Burkina-Faso,Scattered clouds.,31 °C,2023-04-04
2,Cameroon,Passing clouds.,25 °C,2023-04-04
3,Cape-Verde,Passing clouds.,23 °C,2023-04-04
4,Gambia,Clear.,24 °C,2023-04-04
5,Ghana,Passing clouds.,27 °C,2023-04-04
6,Guinea,Passing clouds.,28 °C,2023-04-04
7,Guinea-Bissau,Clear.,23 °C,2023-04-04
8,Liberia,Passing clouds.,24 °C,2023-04-04
9,Mali,Clear.,25 °C,2023-04-04


In [26]:
#removing the "degree celcius" from the temperature column

temp = weather_df['Current_Temperature(c)'].str.split(expand = True)

#assign the temperatures to the main dataset
weather_df['Current_Temperature(c)'] = temp[0].astype(int)

#change the Date column to the right data type
weather_df['Date'] = pd.to_datetime(weather_df['Date'])

weather_df

Unnamed: 0,Country,Current_Weather,Current_Temperature(c),Date
0,Benin,Thunderstorms. Passing clouds.,27,2023-04-04
1,Burkina-Faso,Scattered clouds.,31,2023-04-04
2,Cameroon,Passing clouds.,25,2023-04-04
3,Cape-Verde,Passing clouds.,23,2023-04-04
4,Gambia,Clear.,24,2023-04-04
5,Ghana,Passing clouds.,27,2023-04-04
6,Guinea,Passing clouds.,28,2023-04-04
7,Guinea-Bissau,Clear.,23,2023-04-04
8,Liberia,Passing clouds.,24,2023-04-04
9,Mali,Clear.,25,2023-04-04


In [27]:
weather_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 4 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   Country                 16 non-null     object        
 1   Current_Weather         16 non-null     object        
 2   Current_Temperature(c)  16 non-null     int32         
 3   Date                    16 non-null     datetime64[ns]
dtypes: datetime64[ns](1), int32(1), object(2)
memory usage: 576.0+ bytes


In [28]:
#saving data to csv file
weather_df.to_csv('west_african_weather.csv', index = False)

In [None]:

pd.DataFrame({'Country', 'Current_Weather', 'Current_Temperature(c)', 'Date'})
# create a new DataFrame from the weather data
new_weather_df = pd.DataFrame({'location': location, 'date': dates, 'temperature': temps, 'weather': weathers})

# load the existing weather DataFrame
existing_weather_df = pd.read_csv('existing_weather_data.csv')

# concatenate the new DataFrame with the existing DataFrame
combined_weather_df = pd.concat([existing_weather_df, new_weather_df], ignore_index=True)

# save the combined DataFrame to a CSV file
combined_weather_df.to_csv('combined_weather_data.csv', index=False)