# Handling Null when we have empty rows represented with some large numbers or Strings (No Event)

In [1]:
import pandas as pd

In [2]:
df_weather = pd.read_csv("./weather_info.csv")

df_weather

Unnamed: 0,Day,Temperature,Windspeed,Condition
0,01-01-2024,33,8.2,Rain
1,01-02-2024,-99999,6.0,No Event
2,01-03-2024,28,88888.0,No Event
3,01-04-2024,38,88888.0,Sunny
4,01-05-2024,99999,88888.0,Sunny
5,01-06-2024,88888,3.5,No Event
6,01-07-2024,25,9.1,No Event
7,01-08-2024,-99999,-99999.0,No Event
8,01-09-2024,99999,99999.0,Cloudy
9,01-10-2024,99999,3.6,Cloudy


In [3]:
df_weather.describe()

Unnamed: 0,Temperature,Windspeed
count,12.0,12.0
mean,15754.583333,22225.225
std,70660.261887,58601.351495
min,-99999.0,-99999.0
25%,22.5,3.575
50%,30.5,7.1
75%,91665.75,88888.0
max,99999.0,99999.0


In [5]:
df_weather.Temperature.unique()

array([    33, -99999,     28,     38,  99999,  88888,     25,     21,
           23])

In [6]:
df_weather["Windspeed"].unique()

array([ 8.2000e+00,  6.0000e+00,  8.8888e+04,  3.5000e+00,  9.1000e+00,
       -9.9999e+04,  9.9999e+04,  3.6000e+00,  2.8000e+00,  5.5000e+00])

In [7]:
df_weather["Windspeed"].value_counts()

Windspeed
 88888.0    3
 8.2        1
 6.0        1
 3.5        1
 9.1        1
-99999.0    1
 99999.0    1
 3.6        1
 2.8        1
 5.5        1
Name: count, dtype: int64

In [8]:
df_weather["Temperature"].value_counts()

Temperature
 99999    3
-99999    2
 33       1
 28       1
 38       1
 88888    1
 25       1
 21       1
 23       1
Name: count, dtype: int64

**Replace all the bad data to nan -> for proper calculations**

In [10]:
df_weather.replace(to_replace=[99999,-99999, 88888], value=pd.NA, inplace=True)

In [11]:
df_weather

Unnamed: 0,Day,Temperature,Windspeed,Condition
0,01-01-2024,33.0,8.2,Rain
1,01-02-2024,,6.0,No Event
2,01-03-2024,28.0,,No Event
3,01-04-2024,38.0,,Sunny
4,01-05-2024,,,Sunny
5,01-06-2024,,3.5,No Event
6,01-07-2024,25.0,9.1,No Event
7,01-08-2024,,,No Event
8,01-09-2024,,,Cloudy
9,01-10-2024,,3.6,Cloudy


In [12]:
df_weather.describe()

Unnamed: 0,Day,Temperature,Windspeed,Condition
count,12,6,7.0,12
unique,12,6,7.0,4
top,01-01-2024,33,8.2,No Event
freq,1,1,1.0,5


In [13]:
df_weather.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Day          12 non-null     object
 1   Temperature  6 non-null      object
 2   Windspeed    7 non-null      object
 3   Condition    12 non-null     object
dtypes: object(4)
memory usage: 516.0+ bytes


In [15]:
type(df_weather["Temperature"][1])

pandas._libs.missing.NAType

In [17]:
df_weather = pd.read_csv("./weather_info.csv")


In [18]:
df_weather

Unnamed: 0,Day,Temperature,Windspeed,Condition
0,01-01-2024,33,8.2,Rain
1,01-02-2024,-99999,6.0,No Event
2,01-03-2024,28,88888.0,No Event
3,01-04-2024,38,88888.0,Sunny
4,01-05-2024,99999,88888.0,Sunny
5,01-06-2024,88888,3.5,No Event
6,01-07-2024,25,9.1,No Event
7,01-08-2024,-99999,-99999.0,No Event
8,01-09-2024,99999,99999.0,Cloudy
9,01-10-2024,99999,3.6,Cloudy


In [19]:
import numpy as np

In [21]:
df_weather.replace(to_replace=[-99999,88888,99999], value=np.nan, inplace=True)

In [22]:
df_weather

Unnamed: 0,Day,Temperature,Windspeed,Condition
0,01-01-2024,33.0,8.2,Rain
1,01-02-2024,,6.0,No Event
2,01-03-2024,28.0,,No Event
3,01-04-2024,38.0,,Sunny
4,01-05-2024,,,Sunny
5,01-06-2024,,3.5,No Event
6,01-07-2024,25.0,9.1,No Event
7,01-08-2024,,,No Event
8,01-09-2024,,,Cloudy
9,01-10-2024,,3.6,Cloudy


In [23]:
df_weather = pd.read_csv("./weather_info.csv")


In [24]:
df_weather

Unnamed: 0,Day,Temperature,Windspeed,Condition
0,01-01-2024,33,8.2,Rain
1,01-02-2024,-99999,6.0,No Event
2,01-03-2024,28,88888.0,No Event
3,01-04-2024,38,88888.0,Sunny
4,01-05-2024,99999,88888.0,Sunny
5,01-06-2024,88888,3.5,No Event
6,01-07-2024,25,9.1,No Event
7,01-08-2024,-99999,-99999.0,No Event
8,01-09-2024,99999,99999.0,Cloudy
9,01-10-2024,99999,3.6,Cloudy


In [25]:
df_weather.replace(to_replace={
    "Temperature":[88888,-99999, 99999],
    "Windspeed":[88888,-99999, 99999],
    "Condition":"No Event"
}, value= np.nan)

Unnamed: 0,Day,Temperature,Windspeed,Condition
0,01-01-2024,33.0,8.2,Rain
1,01-02-2024,,6.0,
2,01-03-2024,28.0,,
3,01-04-2024,38.0,,Sunny
4,01-05-2024,,,Sunny
5,01-06-2024,,3.5,
6,01-07-2024,25.0,9.1,
7,01-08-2024,,,
8,01-09-2024,,,Cloudy
9,01-10-2024,,3.6,Cloudy


In [26]:
df_weather

Unnamed: 0,Day,Temperature,Windspeed,Condition
0,01-01-2024,33,8.2,Rain
1,01-02-2024,-99999,6.0,No Event
2,01-03-2024,28,88888.0,No Event
3,01-04-2024,38,88888.0,Sunny
4,01-05-2024,99999,88888.0,Sunny
5,01-06-2024,88888,3.5,No Event
6,01-07-2024,25,9.1,No Event
7,01-08-2024,-99999,-99999.0,No Event
8,01-09-2024,99999,99999.0,Cloudy
9,01-10-2024,99999,3.6,Cloudy


In [27]:
df_weather.replace({
    99999:np.nan,
    88888:np.nan,
    -99999:np.nan,
    "No Event":"Not Found"
    
})

Unnamed: 0,Day,Temperature,Windspeed,Condition
0,01-01-2024,33.0,8.2,Rain
1,01-02-2024,,6.0,Not Found
2,01-03-2024,28.0,,Not Found
3,01-04-2024,38.0,,Sunny
4,01-05-2024,,,Sunny
5,01-06-2024,,3.5,Not Found
6,01-07-2024,25.0,9.1,Not Found
7,01-08-2024,,,Not Found
8,01-09-2024,,,Cloudy
9,01-10-2024,,3.6,Cloudy


In [28]:
df_scores = pd.DataFrame({
   "name":["Shurti","Rohan","Pratik","Ashish","Abdul","Rohini"],
   "Grade":["excellent","average","poor","good","excellent","poor"]
})

df_scores

Unnamed: 0,name,Grade
0,Shurti,excellent
1,Rohan,average
2,Pratik,poor
3,Ashish,good
4,Abdul,excellent
5,Rohini,poor


In [29]:
# Replace Grade String To Grades (A,B,C)
# excellent, good, average, poor
# A, B, C, D
df_scores.replace({
    "excellent":"A", 
    "good":"B",
    "average":"C",
    "poor":"D"
})

Unnamed: 0,name,Grade
0,Shurti,A
1,Rohan,C
2,Pratik,D
3,Ashish,B
4,Abdul,A
5,Rohini,D


In [30]:
df_scores

Unnamed: 0,name,Grade
0,Shurti,excellent
1,Rohan,average
2,Pratik,poor
3,Ashish,good
4,Abdul,excellent
5,Rohini,poor


In [31]:
df_scores.replace(["excellent","good","average","poor"],["A","B","C","D"])

Unnamed: 0,name,Grade
0,Shurti,A
1,Rohan,C
2,Pratik,D
3,Ashish,B
4,Abdul,A
5,Rohini,D
