In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random



These are perfect for learning Pandas deeply and preparing for **data analysis / ML**.

---

# ðŸŸ¢ BASIC LEVEL (1â€“10)

1. Read the CSV file into a Pandas DataFrame.
2. Display the first 10 rows of the dataset.
3. Display the last 5 rows of the dataset.
4. Print the total number of rows and columns.
5. Print all column names.
6. Check the data types of each column.
7. Show basic statistical summary of numerical columns.
8. Select only the columns `name`, `room_type`, and `price`.
9. Count how many listings are there in total.
10. Find how many unique `neighbourhood_group` values exist.

---

# ðŸŸ¡ MEDIUM LEVEL (11â€“20)

11. Check how many missing values exist in each column.
12. Drop all rows where `price` is missing.
13. Fill missing values in `reviews_per_month` with `0`.
14. Convert the `last_review` column into datetime format.
15. Find the average price of all listings.
16. Find the minimum and maximum price.
17. Count how many listings are there for each `room_type`.
18. Find the average price for each `neighbourhood_group`.
19. Filter listings where `price` is greater than 300.
20. Sort the dataset by `number_of_reviews` in descending order.

---

# ðŸ”µ HARD LEVEL (21â€“30)

21. Find the neighbourhood group with the highest average price.
22. Find the top 10 most reviewed listings.
23. Find listings where `minimum_nights` is greater than 30.
24. Find hosts who own more than 5 listings.
25. Create a new column `price_per_night_category`:

* `Low` if price < 100
* `Medium` if price between 100â€“300
* `High` if price > 300

26. Find the average availability (`availability_365`) for each room type.
27. Find listings that have **never been reviewed**.
28. Find the most common neighbourhood.
29. Calculate the percentage of missing values in each column.
30. Prepare a cleaned dataset by:

* Removing rows with missing `price`
* Filling missing `reviews_per_month` with median
* Keeping only listings with `availability_365 > 0`

---

## ðŸ§  Bonus Challenge (Optional â€“ ML Thinking)

* Encode `room_type` and `neighbourhood_group` using numerical values.
* Create a dataset suitable for predicting **price**.
* Select features and target column.



In [3]:
df=pd.read_csv("D:\ML\Math for ML\AB_NYC_2019.csv")
print(df.head().to_string())
# print(df.info)

     id                                              name  host_id    host_name neighbourhood_group neighbourhood  latitude  longitude        room_type  price  minimum_nights  number_of_reviews last_review  reviews_per_month  calculated_host_listings_count  availability_365
0  2539                Clean & quiet apt home by the park     2787         John            Brooklyn    Kensington  40.64749  -73.97237     Private room    149               1                  9  2018-10-19               0.21                               6               365
1  2595                             Skylit Midtown Castle     2845     Jennifer           Manhattan       Midtown  40.75362  -73.98377  Entire home/apt    225               1                 45  2019-05-21               0.38                               2               355
2  3647               THE VILLAGE OF HARLEM....NEW YORK !     4632    Elisabeth           Manhattan        Harlem  40.80902  -73.94190     Private room    150               3 

In [None]:
print("Total number of unique id:",df["id"].nunique())
print("Total number of host id:",df["host_id"].nunique())
print("Total number of  neighbourhood_group:",df["neighbourhood_group"].nunique())
print("Total number of neighbourhood:",df["neighbourhood"].nunique())
print("Total number of room type is:",df["room_type"].nunique())

In [None]:
print("Is there any missing title name?\n",df["name"].isnull())
df["name"].fillna("No Name")

In [24]:
df["last_review"] = pd.to_datetime(df["last_review"])
df

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,Clean & quiet apt home by the park,2787,John,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,2018-10-19,0.21,6,365
1,2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,2019-05-21,0.38,2,355
2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,Manhattan,Harlem,40.80902,-73.94190,Private room,150,3,0,NaT,,1,365
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,2019-07-05,4.64,1,194
4,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,2018-11-19,0.10,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48890,36484665,Charming one bedroom - newly renovated rowhouse,8232441,Sabrina,Brooklyn,Bedford-Stuyvesant,40.67853,-73.94995,Private room,70,2,0,NaT,,2,9
48891,36485057,Affordable room in Bushwick/East Williamsburg,6570630,Marisol,Brooklyn,Bushwick,40.70184,-73.93317,Private room,40,4,0,NaT,,2,36
48892,36485431,Sunny Studio at Historical Neighborhood,23492952,Ilgar & Aysel,Manhattan,Harlem,40.81475,-73.94867,Entire home/apt,115,10,0,NaT,,1,27
48893,36485609,43rd St. Time Square-cozy single bed,30985759,Taz,Manhattan,Hell's Kitchen,40.75751,-73.99112,Shared room,55,1,0,NaT,,6,2


In [25]:
df["room_type"].value_counts()

room_type
Entire home/apt    25409
Private room       22326
Shared room         1160
Name: count, dtype: int64

In [27]:
print(df["neighbourhood_group"].value_counts())

df.groupby("neighbourhood_group")["price"].mean()


neighbourhood_group
Manhattan        21661
Brooklyn         20104
Queens            5666
Bronx             1091
Staten Island      373
Name: count, dtype: int64


neighbourhood_group
Bronx             87.496792
Brooklyn         124.383207
Manhattan        196.875814
Queens            99.517649
Staten Island    114.812332
Name: price, dtype: float64

In [36]:
df.groupby("neighbourhood_group")

for group,count in df.groupby("neighbourhood_group"):
    print(group)
    print(count["price"].mean(numeric_only=True).round(2))

Bronx
87.5
Brooklyn
124.38
Manhattan
196.88
Queens
99.52
Staten Island
114.81


In [37]:
df.sort_values("number_of_reviews", ascending=False)

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
11759,9145202,Room near JFK Queen Bed,47621202,Dona,Queens,Jamaica,40.66730,-73.76831,Private room,47,1,629,2019-07-05,14.58,2,333
2031,903972,Great Bedroom in Manhattan,4734398,Jj,Manhattan,Harlem,40.82085,-73.94025,Private room,49,1,607,2019-06-21,7.75,3,293
2030,903947,Beautiful Bedroom in Manhattan,4734398,Jj,Manhattan,Harlem,40.82124,-73.93838,Private room,49,1,597,2019-06-23,7.72,3,342
2015,891117,Private Bedroom in Manhattan,4734398,Jj,Manhattan,Harlem,40.82264,-73.94041,Private room,49,1,594,2019-06-15,7.57,3,339
13495,10101135,Room Near JFK Twin Beds,47621202,Dona,Queens,Jamaica,40.66939,-73.76975,Private room,47,1,576,2019-06-27,13.40,2,173
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48871,36475746,A LARGE ROOM - 1 MONTH MINIMUM - WASHER&DRYER,144008701,Ozzy Ciao,Manhattan,Harlem,40.82233,-73.94687,Private room,35,29,0,NaT,,2,31
48870,36474911,"Cozy, clean Williamsburg 1- bedroom apartment",1273444,Tanja,Brooklyn,Williamsburg,40.71197,-73.94946,Entire home/apt,99,4,0,NaT,,1,22
48869,36474023,"Cozy, Sunny Brooklyn Escape",1550580,Julia,Brooklyn,Bedford-Stuyvesant,40.68759,-73.95705,Private room,45,4,0,NaT,,1,7
48868,36473253,Heaven for you(only for guy),261338177,Diana,Brooklyn,Gravesend,40.59118,-73.97119,Shared room,25,7,0,NaT,,6,365


In [39]:
def price_category(price):
    if price < 100:
        return "Low"
    elif price <= 300:
        return "Medium"
    else:
        return "High"

df["price_category"] = df["price"].apply(price_category)
df

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,price_category
0,2539,Clean & quiet apt home by the park,2787,John,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,2018-10-19,0.21,6,365,Medium
1,2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,2019-05-21,0.38,2,355,Medium
2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,Manhattan,Harlem,40.80902,-73.94190,Private room,150,3,0,NaT,,1,365,Medium
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,2019-07-05,4.64,1,194,Low
4,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,2018-11-19,0.10,1,0,Low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48890,36484665,Charming one bedroom - newly renovated rowhouse,8232441,Sabrina,Brooklyn,Bedford-Stuyvesant,40.67853,-73.94995,Private room,70,2,0,NaT,,2,9,Low
48891,36485057,Affordable room in Bushwick/East Williamsburg,6570630,Marisol,Brooklyn,Bushwick,40.70184,-73.93317,Private room,40,4,0,NaT,,2,36,Low
48892,36485431,Sunny Studio at Historical Neighborhood,23492952,Ilgar & Aysel,Manhattan,Harlem,40.81475,-73.94867,Entire home/apt,115,10,0,NaT,,1,27,Medium
48893,36485609,43rd St. Time Square-cozy single bed,30985759,Taz,Manhattan,Hell's Kitchen,40.75751,-73.99112,Shared room,55,1,0,NaT,,6,2,Low


In [None]:
df["host_name"].value_counts()
host_names=df["host_name"].values

In [17]:
miss_value=(df.isna().mean()*100).round(2)
miss_value

id                                 0.00
name                               0.03
host_id                            0.00
host_name                          0.04
neighbourhood_group                0.00
neighbourhood                      0.00
latitude                           0.00
longitude                          0.00
room_type                          0.00
price                              0.00
minimum_nights                     0.00
number_of_reviews                  0.00
last_review                       20.56
reviews_per_month                 20.56
calculated_host_listings_count     0.00
availability_365                   0.00
dtype: float64

In [29]:
df["neighbourhood"].value_counts().idxmax()

'Williamsburg'

In [40]:

df_clean = df.dropna(subset=["price"])
df_clean["reviews_per_month"].fillna(df_clean["reviews_per_month"].median())
df_clean = df_clean[df_clean["availability_365"] > 0]
df_clean

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,Clean & quiet apt home by the park,2787,John,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,2018-10-19,0.21,6,365
1,2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,2019-05-21,0.38,2,355
2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,Manhattan,Harlem,40.80902,-73.94190,Private room,150,3,0,,,1,365
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,2019-07-05,4.64,1,194
5,5099,Large Cozy 1 BR Apartment In Midtown East,7322,Chris,Manhattan,Murray Hill,40.74767,-73.97500,Entire home/apt,200,3,74,2019-06-22,0.59,1,129
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48890,36484665,Charming one bedroom - newly renovated rowhouse,8232441,Sabrina,Brooklyn,Bedford-Stuyvesant,40.67853,-73.94995,Private room,70,2,0,,,2,9
48891,36485057,Affordable room in Bushwick/East Williamsburg,6570630,Marisol,Brooklyn,Bushwick,40.70184,-73.93317,Private room,40,4,0,,,2,36
48892,36485431,Sunny Studio at Historical Neighborhood,23492952,Ilgar & Aysel,Manhattan,Harlem,40.81475,-73.94867,Entire home/apt,115,10,0,,,1,27
48893,36485609,43rd St. Time Square-cozy single bed,30985759,Taz,Manhattan,Hell's Kitchen,40.75751,-73.99112,Shared room,55,1,0,,,6,2
