In [80]:
#Importing Libraries
import pandas as pd

In [81]:
#Loading the dataset
url = 'https://raw.githubusercontent.com/Mburu-Elvis/NairobiRentPrices/main/data/buy_rent_final.csv'
df = pd.read_csv(url,sep=",")

In [82]:
df.head()

Unnamed: 0.1,Unnamed: 0,City,Location,Bedrooms,Bathrooms,Parking,Price
0,0,Westlands,Spring Valley,4.0,,1.0,3000
1,1,Westlands,Westlands Area,2.0,2.0,1.0,150000
2,2,Westlands,Riverside,3.0,3.0,1.0,200000
3,3,Nairobi,Upper Hill,1.0,1.0,1.0,100000
4,4,Westlands,Brookside,4.0,3.0,1.0,120000


In [83]:
#delete the unnamed column
df = df.drop('Unnamed: 0', axis=1) 

In [84]:
df.head(3)

Unnamed: 0,City,Location,Bedrooms,Bathrooms,Parking,Price
0,Westlands,Spring Valley,4.0,,1.0,3000
1,Westlands,Westlands Area,2.0,2.0,1.0,150000
2,Westlands,Riverside,3.0,3.0,1.0,200000


### Handling Nulls

In [85]:
# Count the number of null values in each column
null_count = df.isnull().sum()
null_count

City           0
Location       1
Bedrooms       5
Bathrooms    176
Parking       42
Price          4
dtype: int64

##### Bedrooms

In [86]:
# Display cells with NaN values in column 'Bedrooms'
null_beds = df[df['Bedrooms'].isnull()]
null_beds

Unnamed: 0,City,Location,Bedrooms,Bathrooms,Parking,Price
59,Nairobi,Westlands,,,,
60,Nairobi,Kilimani,,,,
82,Nairobi,Kileleshwa,,,,
644,Nairobi,Lavington,,,,
1464,Nairobi,Kileleshwa,,1.0,1.0,60000.0


In [87]:
# Replace NaN in Bedrooms column with 0
df['Bedrooms'] = df['Bedrooms'].fillna(0)

In [88]:
df['Bedrooms'].isnull().sum()

0

In [89]:
df.isnull().sum()

City           0
Location       1
Bedrooms       0
Bathrooms    176
Parking       42
Price          4
dtype: int64

##### Bathrooms

In [90]:
# Display cells with NaN values in column 'Bathrooms'
null_baths = df[df['Bathrooms'].isnull()]
null_baths.head(5)

Unnamed: 0,City,Location,Bedrooms,Bathrooms,Parking,Price
0,Westlands,Spring Valley,4.0,,1.0,3000.0
59,Nairobi,Westlands,0.0,,,
60,Nairobi,Kilimani,0.0,,,
82,Nairobi,Kileleshwa,0.0,,,
105,Kiambu County,Ruaka,2.0,,1.0,40000.0


In [91]:
# Replace NaN in Bathrooms column with 0
df['Bathrooms'] = df['Bathrooms'].fillna(0)

In [92]:
df['Bathrooms'].isnull().sum()

0

##### Parking

In [93]:
# Replace NaN in Parking column with 0
df['Parking'] = df['Parking'].fillna(0)

In [94]:
#Rechecking for nulls in dataframe
df.isnull().sum()

City         0
Location     1
Bedrooms     0
Bathrooms    0
Parking      0
Price        4
dtype: int64

### Handling the price column

In [95]:
# Display null cells in the price column
null_price= df[df['Price'].isnull()]
null_price

Unnamed: 0,City,Location,Bedrooms,Bathrooms,Parking,Price
59,Nairobi,Westlands,0.0,0.0,0.0,
60,Nairobi,Kilimani,0.0,0.0,0.0,
82,Nairobi,Kileleshwa,0.0,0.0,0.0,
644,Nairobi,Lavington,0.0,0.0,0.0,


In [96]:
##Filling nulls
df['Price'] =df['Price'].fillna(0)

In [97]:
df.isnull().sum()

City         0
Location     1
Bedrooms     0
Bathrooms    0
Parking      0
Price        0
dtype: int64

The nulls relevant to the dataset have been handled

### Cleaning

In [98]:
display(df.dtypes)

City          object
Location      object
Bedrooms     float64
Bathrooms    float64
Parking      float64
Price         object
dtype: object

In [101]:
# Convert float dtypes to int
df['Bedrooms'] = df['Bedrooms'].astype(int)
df['Bathrooms'] =df['Bathrooms'].astype(int)
df['Parking'] =df['Parking'].astype(int)

In [102]:
display(df.dtypes)

City         object
Location     object
Bedrooms      int32
Bathrooms     int32
Parking       int32
Price        object
dtype: object

Float data types succesfully converted to integers

##### Handling the Price column

In [109]:
# Step 1: Clean the column (remove non-numeric characters)
df['Price'] = df['Price'].str.replace('[^0-9]', '', regex=True)


In [110]:

# Step 2: Fill the nulls
df['Price'] = df['Price'].fillna(0)

In [112]:
#Checking for the column data type
df['Price'].dtype

dtype('O')

In [122]:
# Resolving the IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer

# Get unique values in the Price column
unique_values = df['Price'].unique()
unique_values

array(['3000', '150000', '200000', '100000', '120000', '210000', '180000',
       '300000', '160000', '325000', '137000', '140000', '330000',
       '250000', '225000', '400000', '190000', '55000', '60000', '350000',
       '170000', '450000', '220000', '280000', '95000', '130000',
       '270000', '230000', 0, '3300', '35000', '2750', '110000', '145000',
       '85000', '33000', '50000', '75000', '90000', '289000', '40000',
       '135000', '125000', '80000', '16000', '290000', '70000', '12000',
       '4500', '320000', '28000', '650000', '115000', '65000', '240000',
       '440000', '78000', '165000', '185000', '105000', '175000', '23000',
       '15000', '260000', '340000', '375000', '247000', '25000', '6500',
       '38000', '164000', '6000', '324000', '89000', '370000', '27500',
       '245000', '42000', '45000', '195000', '297000', '639000', '5000',
       '119500', '305000', '10000', '30000', '331000', '215000', '8000',
       '82000', '7000', '321000', '551600', '13000', '56600

In [130]:
#Step 2: Check for Non-finite values in the price column and convert
for value in df['Price']:
    if value == "NA" or value == "Inf":
        print(f"Skipping non-finite value: {value}")
    else:
        try:
            converted_value_result = int(value)
            print(converted_value_result)
        except (ValueError, TypeError):
            print(f"Could not convert value to int: {value}")

3000
150000
200000
100000
120000
210000
180000
300000
200000
160000
325000
137000
140000
330000
140000
250000
150000
225000
120000
300000
200000
150000
400000
190000
200000
55000
150000
60000
200000
60000
350000
170000
300000
120000
120000
140000
450000
220000
450000
350000
280000
150000
170000
95000
150000
350000
170000
130000
400000
270000
350000
220000
200000
150000
180000
250000
150000
250000
230000
0
0
220000
3300
95000
180000
180000
180000
180000
55000
60000
35000
60000
55000
2750
55000
250000
100000
110000
200000
145000
130000
85000
0
280000
150000
170000
33000
140000
250000
50000
180000
200000
75000
250000
85000
200000
150000
200000
100000
100000
180000
90000
110000
289000
140000
40000
250000
130000
160000
300000
75000
90000
135000
125000
180000
250000
230000
80000
280000
200000
180000
75000
75000
16000
150000
130000
290000
400000
110000
55000
160000
300000
160000
250000
200000
70000
135000
12000
4500
80000
160000
250000
170000
320000
150000
28000
85000
180000
100000
95000
2800

**Code Explanation** <br>
The code above hanldles the non-finite values in the price column by attempting to convert its elements into integers. It also includes logic to skip values that are non-finite (e.g., "NA" and "Inf") and to handle cases where conversion to an integer is not possible.

In [133]:
#Convert column data type from object to int
df['Price'] =df['Parking'].astype(int)

In [134]:
display(df.dtypes)

City         object
Location     object
Bedrooms      int32
Bathrooms     int32
Parking       int32
Price         int32
dtype: object

Data is clean! Ready for merging and EDA :-)

### Creating a csv file of the clean BuyRent Data

In [135]:
# Save the clean data to a new CSV file
df.to_csv('Cleaned_BuyRent.csv', index=False)