In [85]:
import pandas as pd

In [87]:
data = {
    'CustomerID': [1, 2, 3, 4, 5],
    'Name': ['John', 'Jane', None, 'Alice', 'Bob'],
    'Purchase': [200, 300, 150, None, 500]
}

In [89]:
customer_data = pd.DataFrame(data)

In [91]:
print(customer_data)

   CustomerID   Name  Purchase
0           1   John     200.0
1           2   Jane     300.0
2           3   None     150.0
3           4  Alice       NaN
4           5    Bob     500.0


In [51]:
missing_value = customer_data.isnull().sum()
missing_value

CustomerID    0
Name          1
Purchase      1
dtype: int64

In [93]:
customer_data['Name'].fillna('unknown',inplace=True)
customer_data

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  customer_data['Name'].fillna('unknown',inplace=True)


Unnamed: 0,CustomerID,Name,Purchase
0,1,John,200.0
1,2,Jane,300.0
2,3,unknown,150.0
3,4,Alice,
4,5,Bob,500.0


In [95]:
median_value = customer_data['Purchase'].mean()
median_value

287.5

In [97]:
customer_data['Purchase']=customer_data['Purchase'].fillna(median_value)

In [75]:
print(median_value)

nan


In [99]:
customer_data['Purchase']

0    200.0
1    300.0
2    150.0
3    287.5
4    500.0
Name: Purchase, dtype: float64

In [101]:
customer_data.dropna(inplace=True)
print(customer_data)

   CustomerID     Name  Purchase
0           1     John     200.0
1           2     Jane     300.0
2           3  unknown     150.0
3           4    Alice     287.5
4           5      Bob     500.0


In [103]:
filtered_customers = customer_data[customer_data['Purchase'] > median_value]
print(filtered_customers)

   CustomerID  Name  Purchase
1           2  Jane     300.0
4           5   Bob     500.0


In [105]:
# 4 Series Operations: Temperature Data

In [107]:
temperature = pd.Series([23, 21, 20, 25, 27, 30, 28, 22, 24, 26])
mean_temperature = temperature.mean()
median_temperature = temperature.median()
variance_temperature = temperature.var()
mean_temperature, median_temperature, variance_temperature


(24.6, 24.5, 10.266666666666666)

In [109]:
above_below_mean = temperature.apply(lambda x: 'above' if x > mean_temperature else 'below')
above_below_mean

0    below
1    below
2    below
3    above
4    above
5    above
6    above
7    below
8    below
9    above
dtype: object

In [111]:
temperature_kelvin = temperature + 273.15
temperature_kelvin

0    296.15
1    294.15
2    293.15
3    298.15
4    300.15
5    303.15
6    301.15
7    295.15
8    297.15
9    299.15
dtype: float64

In [113]:
rolling_mean = temperature.rolling(window=3).mean()
rolling_mean

0          NaN
1          NaN
2    21.333333
3    22.000000
4    24.000000
5    27.333333
6    28.333333
7    26.666667
8    24.666667
9    24.000000
dtype: float64

In [115]:
std_dev_temperature = temperature.std()
filtered_temperatures = temperature[(temperature < (mean_temperature - std_dev_temperature)) | 
                                    (temperature > (mean_temperature + std_dev_temperature))]
filtered_temperatures

1    21
2    20
5    30
6    28
dtype: int64

In [117]:
# merging dataframes: product orders

In [119]:
orders = pd.DataFrame({
    'OrderID': [1, 2, 3, 4, 5],
    'CustomerID': [101, 102, 103, 104, 101],
    'Product': ['A', 'B', 'A', 'C', 'B'],
    'Quantity': [2, 1, 4, 2, 3]
})

customers = pd.DataFrame({
    'CustomerID': [101, 102, 103, 104],
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Location': ['New York', 'Los Angeles', 'Chicago', 'Houston']
})

merged_df = pd.merge(orders, customers, on='CustomerID')
print(merged_df)

   OrderID  CustomerID Product  Quantity     Name     Location
0        1         101       A         2    Alice     New York
1        2         102       B         1      Bob  Los Angeles
2        3         103       A         4  Charlie      Chicago
3        4         104       C         2    David      Houston
4        5         101       B         3    Alice     New York


In [121]:
location_totals = merged_df.groupby('Location')['Quantity'].sum()
print(location_totals)

Location
Chicago        4
Houston        2
Los Angeles    1
New York       5
Name: Quantity, dtype: int64


In [123]:
maxing_value = merged_df['Quantity'].max()
print(maxing_value)

4
