In [1]:
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
series = pd.Series(["BMW", "Toyota", "Honda"])
series

0       BMW
1    Toyota
2     Honda
dtype: object

In [3]:
colors = pd.Series(["red", "blue", "white"])
colors

0      red
1     blue
2    white
dtype: object

In [4]:
car_data = pd.DataFrame({"Car Make": series, "Colors": colors})

In [5]:
car_data

Unnamed: 0,Car Make,Colors
0,BMW,red
1,Toyota,blue
2,Honda,white


In [6]:
car_sales = pd.read_csv("car-sales.csv")
car_sales

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


In [7]:
car_sales.to_csv("exp_car_sales.csv", index=False)

# Describing Data

In [8]:
car_sales.dtypes

Make             object
Colour           object
Odometer (KM)     int64
Doors             int64
Price            object
dtype: object

In [9]:
car_columns = car_sales.columns
car_columns

Index(['Make', 'Colour', 'Odometer (KM)', 'Doors', 'Price'], dtype='object')

In [10]:
car_sales.index

RangeIndex(start=0, stop=10, step=1)

In [11]:
car_sales.describe()

Unnamed: 0,Odometer (KM),Doors
count,10.0,10.0
mean,78601.4,4.0
std,61983.471735,0.471405
min,11179.0,3.0
25%,35836.25,4.0
50%,57369.0,4.0
75%,96384.5,4.0
max,213095.0,5.0


In [12]:
car_data

Unnamed: 0,Car Make,Colors
0,BMW,red
1,Toyota,blue
2,Honda,white


In [13]:
car_sales.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Make           10 non-null     object
 1   Colour         10 non-null     object
 2   Odometer (KM)  10 non-null     int64 
 3   Doors          10 non-null     int64 
 4   Price          10 non-null     object
dtypes: int64(2), object(3)
memory usage: 532.0+ bytes


In [14]:
car_sales.describe()

Unnamed: 0,Odometer (KM),Doors
count,10.0,10.0
mean,78601.4,4.0
std,61983.471735,0.471405
min,11179.0,3.0
25%,35836.25,4.0
50%,57369.0,4.0
75%,96384.5,4.0
max,213095.0,5.0


In [15]:
car_prices = pd.Series([3_000, 5_000, 10_000])
car_prices.mean()

6000.0

In [16]:
car_sales["Odometer (KM)"].mean()

78601.4

In [17]:
len(car_sales)

10

# Viewing and Selecting data

In [18]:
car_sales.head(7)

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"


loc and iloc

In [19]:
animals = pd.Series(["Cat", "Dog", "Panda"], index=[1, 1, 5])
animals
animals.iloc[1]

'Dog'

In [20]:
car_sales.iloc[:3]

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"


In [21]:
car_sales[car_sales.Make == "Toyota"]

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
5,Toyota,Green,99213,4,"$4,500.00"
8,Toyota,White,60000,4,"$6,250.00"


In [22]:
pd.crosstab(car_sales["Make"], car_sales["Doors"])

Doors,3,4,5
Make,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BMW,0,0,1
Honda,0,3,0
Nissan,0,2,0
Toyota,1,3,0


Group-by

In [23]:
car_sales.groupby(["Doors"]).mean()
# print(car_sales.dtypes)

TypeError: agg function failed [how->mean,dtype->object]

In [None]:
car_sales["Odometer (KM)"].plot()

In [None]:
car_sales["Odometer (KM)"].hist()

In [None]:
car_sales["Price"] = car_sales["Price"].str.replace(',', '').astype(int)

In [None]:
car_sales.Price.plot()

In [None]:
car_sales.dropna(inplace=True)

In [None]:
missingCar = pd.read_csv("car-sales-missing-data.csv")

In [None]:
missingCar.dropna(inplace=True)

In [None]:
missingCar.to_csv("car-sales-missing-droped.csv")

In [None]:
seats_column = pd.Series([5] * 5)
car_sales["Seats"] = seats_column
car_sales

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price,Seats
0,Toyota,White,150043,4,"$4,000.00",5.0
1,Honda,Red,87899,4,"$5,000.00",5.0
2,Toyota,Blue,32549,3,"$7,000.00",5.0
3,BMW,Black,11179,5,"$22,000.00",5.0
4,Nissan,White,213095,4,"$3,500.00",5.0
5,Toyota,Green,99213,4,"$4,500.00",
6,Honda,Blue,45698,4,"$7,500.00",
7,Honda,Blue,54738,4,"$7,000.00",
8,Toyota,White,60000,4,"$6,250.00",
9,Nissan,White,31600,4,"$9,700.00",


In [None]:
car_sales["Seats"].fillna(5, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  car_sales["Seats"].fillna(5, inplace=True)


In [None]:
car_sales

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price,Seats
0,Toyota,White,150043,4,"$4,000.00",5.0
1,Honda,Red,87899,4,"$5,000.00",5.0
2,Toyota,Blue,32549,3,"$7,000.00",5.0
3,BMW,Black,11179,5,"$22,000.00",5.0
4,Nissan,White,213095,4,"$3,500.00",5.0
5,Toyota,Green,99213,4,"$4,500.00",5.0
6,Honda,Blue,45698,4,"$7,500.00",5.0
7,Honda,Blue,54738,4,"$7,000.00",5.0
8,Toyota,White,60000,4,"$6,250.00",5.0
9,Nissan,White,31600,4,"$9,700.00",5.0


In [None]:
car_sales[car_sales["Colour"].str.lower() == "white"]

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price,Seats
0,Toyota,White,150043,4,"$4,000.00",5.0
4,Nissan,White,213095,4,"$3,500.00",5.0
8,Toyota,White,60000,4,"$6,250.00",5.0
9,Nissan,White,31600,4,"$9,700.00",5.0


In [None]:
# Column from list
fuel_economy = [6] * len(car_sales)
car_sales["Fuel per 100KM"] = fuel_economy
car_sales

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price,Seats,Fuel per 100KM
0,Toyota,White,150043,4,"$4,000.00",5.0,6
1,Honda,Red,87899,4,"$5,000.00",5.0,6
2,Toyota,Blue,32549,3,"$7,000.00",5.0,6
3,BMW,Black,11179,5,"$22,000.00",5.0,6
4,Nissan,White,213095,4,"$3,500.00",5.0,6
5,Toyota,Green,99213,4,"$4,500.00",5.0,6
6,Honda,Blue,45698,4,"$7,500.00",5.0,6
7,Honda,Blue,54738,4,"$7,000.00",5.0,6
8,Toyota,White,60000,4,"$6,250.00",5.0,6
9,Nissan,White,31600,4,"$9,700.00",5.0,6


In [None]:
car_sales["Total Fuel Used in L"] = car_sales["Odometer (KM)"] / 100 * car_sales["Fuel per 100KM"]

In [None]:
car_sales

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price,Seats,Fuel per 100KM,Total Fuel Used,Total Fuel Used in L
0,Toyota,White,150043,4,"$4,000.00",5.0,6,9002.58,9002.58
1,Honda,Red,87899,4,"$5,000.00",5.0,6,5273.94,5273.94
2,Toyota,Blue,32549,3,"$7,000.00",5.0,6,1952.94,1952.94
3,BMW,Black,11179,5,"$22,000.00",5.0,6,670.74,670.74
4,Nissan,White,213095,4,"$3,500.00",5.0,6,12785.7,12785.7
5,Toyota,Green,99213,4,"$4,500.00",5.0,6,5952.78,5952.78
6,Honda,Blue,45698,4,"$7,500.00",5.0,6,2741.88,2741.88
7,Honda,Blue,54738,4,"$7,000.00",5.0,6,3284.28,3284.28
8,Toyota,White,60000,4,"$6,250.00",5.0,6,3600.0,3600.0
9,Nissan,White,31600,4,"$9,700.00",5.0,6,1896.0,1896.0


In [None]:
car_sales["Number of Wheels"] = 4
car_sales

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price,Seats,Fuel per 100KM,Total Fuel Used,Total Fuel Used in L,Number of Wheels
0,Toyota,White,150043,4,"$4,000.00",5.0,6,9002.58,9002.58,4
1,Honda,Red,87899,4,"$5,000.00",5.0,6,5273.94,5273.94,4
2,Toyota,Blue,32549,3,"$7,000.00",5.0,6,1952.94,1952.94,4
3,BMW,Black,11179,5,"$22,000.00",5.0,6,670.74,670.74,4
4,Nissan,White,213095,4,"$3,500.00",5.0,6,12785.7,12785.7,4
5,Toyota,Green,99213,4,"$4,500.00",5.0,6,5952.78,5952.78,4
6,Honda,Blue,45698,4,"$7,500.00",5.0,6,2741.88,2741.88,4
7,Honda,Blue,54738,4,"$7,000.00",5.0,6,3284.28,3284.28,4
8,Toyota,White,60000,4,"$6,250.00",5.0,6,3600.0,3600.0,4
9,Nissan,White,31600,4,"$9,700.00",5.0,6,1896.0,1896.0,4


In [None]:
car_sales["Passed road Safety"] = True

In [None]:
car_sales

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price,Seats,Fuel per 100KM,Total Fuel Used,Total Fuel Used in L,Number of Wheels,Passed road Safety
0,Toyota,White,150043,4,"$4,000.00",5.0,6,9002.58,9002.58,4,True
1,Honda,Red,87899,4,"$5,000.00",5.0,6,5273.94,5273.94,4,True
2,Toyota,Blue,32549,3,"$7,000.00",5.0,6,1952.94,1952.94,4,True
3,BMW,Black,11179,5,"$22,000.00",5.0,6,670.74,670.74,4,True
4,Nissan,White,213095,4,"$3,500.00",5.0,6,12785.7,12785.7,4,True
5,Toyota,Green,99213,4,"$4,500.00",5.0,6,5952.78,5952.78,4,True
6,Honda,Blue,45698,4,"$7,500.00",5.0,6,2741.88,2741.88,4,True
7,Honda,Blue,54738,4,"$7,000.00",5.0,6,3284.28,3284.28,4,True
8,Toyota,White,60000,4,"$6,250.00",5.0,6,3600.0,3600.0,4,True
9,Nissan,White,31600,4,"$9,700.00",5.0,6,1896.0,1896.0,4,True


In [None]:
car_sales.head()

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price,Seats,Fuel per 100KM,Total Fuel Used,Total Fuel Used in L,Number of Wheels,Passed road Safety
0,Toyota,White,150043,4,"$4,000.00",5.0,6,9002.58,9002.58,4,True
1,Honda,Red,87899,4,"$5,000.00",5.0,6,5273.94,5273.94,4,True
2,Toyota,Blue,32549,3,"$7,000.00",5.0,6,1952.94,1952.94,4,True
3,BMW,Black,11179,5,"$22,000.00",5.0,6,670.74,670.74,4,True
4,Nissan,White,213095,4,"$3,500.00",5.0,6,12785.7,12785.7,4,True


In [None]:
car_sales.drop("Total Fuel Used", axis=1, inplace=True)

In [None]:
car_sales

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price,Seats,Fuel per 100KM,Total Fuel Used in L,Number of Wheels,Passed road Safety
0,Toyota,White,150043,4,"$4,000.00",5.0,6,9002.58,4,True
1,Honda,Red,87899,4,"$5,000.00",5.0,6,5273.94,4,True
2,Toyota,Blue,32549,3,"$7,000.00",5.0,6,1952.94,4,True
3,BMW,Black,11179,5,"$22,000.00",5.0,6,670.74,4,True
4,Nissan,White,213095,4,"$3,500.00",5.0,6,12785.7,4,True
5,Toyota,Green,99213,4,"$4,500.00",5.0,6,5952.78,4,True
6,Honda,Blue,45698,4,"$7,500.00",5.0,6,2741.88,4,True
7,Honda,Blue,54738,4,"$7,000.00",5.0,6,3284.28,4,True
8,Toyota,White,60000,4,"$6,250.00",5.0,6,3600.0,4,True
9,Nissan,White,31600,4,"$9,700.00",5.0,6,1896.0,4,True


In [25]:
car_sales.to_csv("TestBitch.csv")

In [110]:
car_sales_shuffled = car_sales.sample(frac=1)

In [88]:
car_sales_shuffled.sample(frac=0.1)

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
7,Honda,Blue,54738,4,"$7,000.00"


In [112]:
car_sales_shuffled.reset_index(drop=True, inplace=True)

In [113]:
car_sales_shuffled

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Blue,54738,4,"$7,000.00"
2,Toyota,Green,99213,4,"$4,500.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Honda,Blue,45698,4,"$7,500.00"
5,Nissan,White,213095,4,"$3,500.00"
6,Toyota,White,60000,4,"$6,250.00"
7,Honda,Red,87899,4,"$5,000.00"
8,Toyota,Blue,32549,3,"$7,000.00"
9,Nissan,White,31600,4,"$9,700.00"


In [114]:
car_sales_shuffled = car_sales_shuffled.sample(frac=1)

In [115]:
car_sales_shuffled

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
8,Toyota,Blue,32549,3,"$7,000.00"
7,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Green,99213,4,"$4,500.00"
5,Nissan,White,213095,4,"$3,500.00"
6,Toyota,White,60000,4,"$6,250.00"
3,BMW,Black,11179,5,"$22,000.00"
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Blue,54738,4,"$7,000.00"
4,Honda,Blue,45698,4,"$7,500.00"
9,Nissan,White,31600,4,"$9,700.00"


In [116]:
car_sales["Odometer (KM)"] = car_sales["Odometer (KM)"].apply(lambda x : x / 1.6)

In [120]:
car_sales.rename(columns={"Odometer (KM)": "Odometer (M)"}, inplace=True)

In [121]:
car_sales

Unnamed: 0,Make,Colour,Odometer (M),Doors,Price
0,Toyota,White,93776.875,4,"$4,000.00"
1,Honda,Red,54936.875,4,"$5,000.00"
2,Toyota,Blue,20343.125,3,"$7,000.00"
3,BMW,Black,6986.875,5,"$22,000.00"
4,Nissan,White,133184.375,4,"$3,500.00"
5,Toyota,Green,62008.125,4,"$4,500.00"
6,Honda,Blue,28561.25,4,"$7,500.00"
7,Honda,Blue,34211.25,4,"$7,000.00"
8,Toyota,White,37500.0,4,"$6,250.00"
9,Nissan,White,19750.0,4,"$9,700.00"
