In [25]:
import os
import pandas as pd

folder_paths = ['Dubai_weather', 'lahore_weather', 'Murree_weather']


dfs = []


for folder_path in folder_paths:
    location_name = os.path.basename(folder_path)  
    files = os.listdir(folder_path)  # Get all files in the folder
    location_dfs = []

    for file_name in files:
        if file_name.endswith('.txt'):
            file_path = os.path.join(folder_path, file_name)
            
           
            df = pd.read_csv(file_path)
            df.columns = df.columns.str.strip()
            df['Location'] = location_name
            #Change column name PKT or GST or PKST to Date and add another column that defines the timezone
            if 'PKT' in df.columns:
                df.rename(columns={'PKT': 'Date'}, inplace=True)
                df['Time_Zone'] = 'PKT'
            elif 'GST' in df.columns:
                df.rename(columns={'GST': 'Date'}, inplace=True)
                df['Time_Zone'] = 'GST'
            elif 'PKST' in df.columns:
                df.rename(columns={'PKST': 'Date'}, inplace=True)

            #Converting Precipationcm to mm and dropping the Precipitationcm column
            if 'PrecipitationCm' in df.columns:
                df['Precipitationmm'] = df['PrecipitationCm'] * 10
                df.drop(columns=['PrecipitationCm'], inplace=True)
            
            location_dfs.append(df)
    

    #Concatenating all the dataframes in the list
    location_df = pd.concat(location_dfs, ignore_index=True)
    dfs.append(location_df)


all_data = pd.concat(dfs, ignore_index=True)



print(all_data.head()) 


       Date  Max TemperatureC  Mean TemperatureC  Min TemperatureC  \
0  2004-8-1              23.0                NaN              22.0   
1  2004-8-2              22.0                NaN              22.0   
2  2004-8-3              23.0               21.0              21.0   
3  2004-8-4              22.0               22.0              21.0   
4  2004-8-5              21.0                NaN              20.0   

   Dew PointC  MeanDew PointC  Min DewpointC  Max Humidity  Mean Humidity  \
0        18.0            18.0           18.0          68.0           68.0   
1        20.0            20.0           20.0          84.0           84.0   
2        21.0            20.0           20.0          88.0           85.0   
3        20.0            19.0           18.0          88.0           78.0   
4        18.0            18.0           18.0          88.0           88.0   

   Min Humidity  ...  Min VisibilitykM  Max Wind SpeedKm/h  \
0          68.0  ...              10.0                

Preprocessing the Data

In [26]:
all_data.isnull().sum()

Date                              0
Max TemperatureC                764
Mean TemperatureC              1348
Min TemperatureC                764
Dew PointC                      764
MeanDew PointC                  764
Min DewpointC                   764
Max Humidity                    766
Mean Humidity                   766
Min Humidity                    766
Max Sea Level PressurehPa      9276
Mean Sea Level PressurehPa     9276
Min Sea Level PressurehPa      9276
Max VisibilityKm                759
Mean VisibilityKm               759
Min VisibilitykM                759
Max Wind SpeedKm/h              757
Mean Wind SpeedKm/h             757
Max Gust SpeedKm/h            14511
Precipitationmm                 754
CloudCover                     6229
Events                        11630
WindDirDegrees                  754
Location                          0
Time_Zone                      1302
dtype: int64

In [30]:
#Dropping list of Columns that are not required for the Report
columns_to_Drop = ['Max Sea Level PressurehPa','Mean Sea Level PressurehPa','Dew PointC','MeanDew PointC','Min DewpointC','Max Wind SpeedKm/h','Mean Wind SpeedKm/h' ,'Min Sea Level PressurehPa','Max VisibilityKm','Mean VisibilityKm','Min VisibilitykM','Max Gust SpeedKm/h','Events','WindDirDegrees','Precipitationmm', 'CloudCover']
cleaned_data = all_data.drop(columns=columns_to_Drop)
cleaned_data.head()

Unnamed: 0,Date,Max TemperatureC,Mean TemperatureC,Min TemperatureC,Max Humidity,Mean Humidity,Min Humidity,Location,Time_Zone
0,2004-8-1,23.0,,22.0,68.0,68.0,68.0,Dubai_weather,GST
1,2004-8-2,22.0,,22.0,84.0,84.0,84.0,Dubai_weather,GST
2,2004-8-3,23.0,21.0,21.0,88.0,85.0,82.0,Dubai_weather,GST
3,2004-8-4,22.0,22.0,21.0,88.0,78.0,68.0,Dubai_weather,GST
4,2004-8-5,21.0,,20.0,88.0,88.0,88.0,Dubai_weather,GST


In [31]:
#Impute Mean TemperatureC if Max TemperatureC and Min TemperatureC are given
cleaned_data.loc[cleaned_data['Max TemperatureC'].notna() & cleaned_data['Min TemperatureC'].notna(), 'Mean TemperatureC'] = (cleaned_data['Max TemperatureC'] + cleaned_data['Min TemperatureC']) / 2

In [32]:
cleaned_data.head()

Unnamed: 0,Date,Max TemperatureC,Mean TemperatureC,Min TemperatureC,Max Humidity,Mean Humidity,Min Humidity,Location,Time_Zone
0,2004-8-1,23.0,22.5,22.0,68.0,68.0,68.0,Dubai_weather,GST
1,2004-8-2,22.0,22.0,22.0,84.0,84.0,84.0,Dubai_weather,GST
2,2004-8-3,23.0,22.0,21.0,88.0,85.0,82.0,Dubai_weather,GST
3,2004-8-4,22.0,21.5,21.0,88.0,78.0,68.0,Dubai_weather,GST
4,2004-8-5,21.0,20.5,20.0,88.0,88.0,88.0,Dubai_weather,GST


In [33]:
cleaned_data.isnull().sum()

Date                    0
Max TemperatureC      764
Mean TemperatureC     764
Min TemperatureC      764
Max Humidity          766
Mean Humidity         766
Min Humidity          766
Location                0
Time_Zone            1302
dtype: int64

In [79]:
#Drop rows where Max TemperatureC and Min TemperatureC are missing and Max Humidity and Min Humidity are missing
cleaned_data = cleaned_data.dropna(subset=['Max TemperatureC', 'Min TemperatureC', 'Max Humidity', 'Min Humidity'])

In [80]:
cleaned_data.isnull().sum()

Date                    0
Max TemperatureC        0
Mean TemperatureC       0
Min TemperatureC        0
Max Humidity            0
Mean Humidity           0
Min Humidity            0
Location                0
Time_Zone            1278
dtype: int64

Analyzing the Given Files and Generating Reports

In [38]:
def get_month(date):
    """Function to Return the Name of the month"""
    year, month, day = date.strip().split('-')
    month_dict = {'1' : 'Jan', '2' : 'Feb', '3' : 'Mar', '4' : 'Apr', '5' : 'May', '6' : 'Jun', '7' : 'Jul', '8' : 'Aug', '9' : 'Sep', '10' : 'Oct', '11' : 'Nov', '12' : 'Dec'}
    return month_dict[month], day

Task 1

In [82]:
#1. For a given year display the highest temperature and day, lowest temperature and day, most humid day and humidity.

year = input("Enter the Year for which you want to check the weather report: ")
year_df = cleaned_data[cleaned_data['Date'].str.contains(year)]

Highest_Temp= int(year_df['Max TemperatureC'].max())
Highest_date = year_df[year_df['Max TemperatureC'] == Highest_Temp]['Date'].values[0]
Highest_month , Highest_day = get_month(Highest_date)

Lowest_Temp = int(year_df['Min TemperatureC'].min())
Lowest_date = year_df[year_df['Min TemperatureC'] == Lowest_Temp]['Date'].values[0]
Lowest_month , Lowest_day = get_month(Lowest_date)

Humid = int(year_df['Max Humidity'].max())
Humid_date = year_df[year_df['Max Humidity'] == Humid]['Date'].values[0]
Humid_month, Humid_day = get_month(Humid_date)


print('Highest Temperature ' + str(Highest_Temp) + 'C' + ' on ' + Highest_month + ' ' + Highest_day)
print('Lowest Temperature ' + str(Lowest_Temp) + 'C' + ' on ' + Lowest_month + ' ' + Lowest_day)
print('Most Humid day ' + str(Humid) + '%' + ' on ' + Humid_month + ' ' + Humid_day)


Highest Temperature 45C on May 9
Lowest Temperature 0C on Jan 6
Most Humid day 100% on Aug 9


Task 2

In [83]:
#2. For a given month display the average highest temperature, average lowest temperature, average humidity.
input_data = input("Enter the Year and Month Space Separated for which you want to check the weather report: ")
year, month = input_data.split()

month_df = cleaned_data[cleaned_data['Date'].str.contains(year + '-' + month)]
average_highest_temp = round(int(month_df['Max TemperatureC'].mean()), 0)
average_lowest_temp = round(int(month_df['Min TemperatureC'].mean()), 0)
average_humity = round(int(month_df['Max Humidity'].mean()), 0)

print('Average Highest Temperature ' + str(average_highest_temp) + 'C')
print('Average Lowest Temperature ' + str(average_lowest_temp) + 'C')
print('Average Humidity ' + str(average_humity) + '%')

Average Highest Temperature 41C
Average Lowest Temperature 26C
Average Humidity 54%


Task 3

In [88]:
#3. For a given month draw two horizontal bar charts on the console for the highest and lowest temperature on each day. Highest in red and lowest in blue

input_data = input("Enter the Year and Month Space Separated for which you want to check the weather report: ")
year, month = input_data.split()
month_dict = {'1' : 'Jan', '2' : 'Feb', '3' : 'Mar', '4' : 'Apr', '5' : 'May', '6' : 'Jun', '7' : 'Jul', '8' : 'Aug', '9' : 'Sep', '10' : 'Oct', '11' : 'Nov', '12' : 'Dec'}
month_df = cleaned_data[cleaned_data['Date'].str.contains(year + '-' + month)]

month_name = month_dict[month]
print(month_name, year)

RED = '\033[91m'
BLUE = '\033[94m'
prev_location = None
for index, row in month_df.iterrows():
    if row['Location'] != prev_location:
        prev_location = row['Location']
        print('Location:' , row['Location'])
        print('\n')
    max_temp = row['Max TemperatureC']
    min_temp = row['Min TemperatureC']
    if(max_temp != None and min_temp != None):
          month, day = get_month(row['Date'])
          print(day, end = '')
          for i in range(int(max_temp)):
               print(RED + '+', end = '')
          print('\033[0m', end = '')
          print(str(int(max_temp)) + 'C', end = '')
          print('\n')
          print(day, end = '')
          for i in range(int(min_temp)):
               print(BLUE + '-', end = '')
          print('\033[0m', end = '')
          print(str(int(min_temp)) + 'C', end = '')
          print('\n')


May 2004
Location: lahore_weather


1[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[0m26C

1[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[0m15C

2[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[0m30C

2[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[0m16C

3[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[0m33C

3[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[0m18C

4[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[9

(Bonus Task)Task 4

In [87]:
#4. BONUS TASK. For a given month draw one horizontal bar chart on the console for the highest and lowest temperature on each day. Highest in red and lowest in blue.
input_data = input("Enter the Year and Month Space Separated for which you want to check the weather report: ")
year, month = input_data.split()
month_dict = {'1' : 'Jan', '2' : 'Feb', '3' : 'Mar', '4' : 'Apr', '5' : 'May', '6' : 'Jun', '7' : 'Jul', '8' : 'Aug', '9' : 'Sep', '10' : 'Oct', '11' : 'Nov', '12' : 'Dec'}
month_df = cleaned_data[cleaned_data['Date'].str.contains(year + '-' + month)]

month_name = month_dict[month]
print(month_name, year)

RED = '\033[91m'
BLUE = '\033[94m'
prev_location = None
for index, row in month_df.iterrows():
    if row['Location'] != prev_location:
        prev_location = row['Location']
        print('Location:' , row['Location'])
        print('\n')
    
    max_temp = row['Max TemperatureC']
    min_temp = row['Min TemperatureC']
    month, day = get_month(row['Date'])
    print(day, end = '')
    for i in range(int(min_temp)):
          print(BLUE + '-', end = '')
    print('\033[0m', end = '')
    for i in range(int(max_temp)):
          print(RED + '+', end = '')
    print('\033[0m', end = '')
    print(str(int(min_temp)) + 'C', end = '')
    print(' - ', end = '')
    print(str(int(max_temp)) + 'C', end = '')
    print('\n')

Apr 2005
Location: Dubai_weather


1[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[0m[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[0m13C - 15C

3[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[0m[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[0m15C - 20C

4[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[0m[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[0m12C - 14C

5[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[0m[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[0m15C - 18C

6[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[94m-[0m[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91m+[91