In [1]:
import pandas as pd
import numpy as np


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
data = pd.read_csv("products.csv")
df = pd.DataFrame(data)
df

Unnamed: 0,ProductID,ProductName,Category,Price
0,201,Laptop,Electronics,1200.0
1,202,Smartphone,Electronics,800.0
2,203,Tablet,Electronics,400.0
3,204,Monitor,Accessories,200.0
4,205,Keyboard,Accessories,


In [3]:
df["Price"] = df["Price"].interpolate()
df

Unnamed: 0,ProductID,ProductName,Category,Price
0,201,Laptop,Electronics,1200.0
1,202,Smartphone,Electronics,800.0
2,203,Tablet,Electronics,400.0
3,204,Monitor,Accessories,200.0
4,205,Keyboard,Accessories,200.0


In [4]:
df.head()

Unnamed: 0,ProductID,ProductName,Category,Price
0,201,Laptop,Electronics,1200.0
1,202,Smartphone,Electronics,800.0
2,203,Tablet,Electronics,400.0
3,204,Monitor,Accessories,200.0
4,205,Keyboard,Accessories,200.0


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   ProductID    5 non-null      int64  
 1   ProductName  5 non-null      object 
 2   Category     5 non-null      object 
 3   Price        5 non-null      float64
dtypes: float64(1), int64(1), object(2)
memory usage: 192.0+ bytes


In [6]:
df.describe()

Unnamed: 0,ProductID,Price
count,5.0,5.0
mean,203.0,560.0
std,1.581139,433.589668
min,201.0,200.0
25%,202.0,200.0
50%,203.0,400.0
75%,204.0,800.0
max,205.0,1200.0


In [7]:
df.value_counts()

ProductID  ProductName  Category     Price 
201        Laptop       Electronics  1200.0    1
202        Smartphone   Electronics  800.0     1
203        Tablet       Electronics  400.0     1
204        Monitor      Accessories  200.0     1
205        Keyboard     Accessories  200.0     1
Name: count, dtype: int64

In [8]:
df["Category"].value_counts()

Category
Electronics    3
Accessories    2
Name: count, dtype: int64

In [9]:
product_df = df[["ProductID","Price"]]

In [10]:
product_df

Unnamed: 0,ProductID,Price
0,201,1200.0
1,202,800.0
2,203,400.0
3,204,200.0
4,205,200.0


In [11]:
arr = product_df.to_numpy()

In [12]:
arr

array([[ 201., 1200.],
       [ 202.,  800.],
       [ 203.,  400.],
       [ 204.,  200.],
       [ 205.,  200.]])

In [13]:
np.unique(df['Category'])

array(['Accessories', 'Electronics'], dtype=object)

In [14]:
np.mean(df['Price'])

560.0

In [15]:
np.std(df['Price'])

387.8143885933063

In [16]:
np.min(df['Price']), np.max(df['Price'])

(200.0, 1200.0)

In [17]:
df[df['Price'] > 500]

Unnamed: 0,ProductID,ProductName,Category,Price
0,201,Laptop,Electronics,1200.0
1,202,Smartphone,Electronics,800.0


In [18]:
df.groupby('Category')['Price'].mean()

Category
Accessories    200.0
Electronics    800.0
Name: Price, dtype: float64

In [19]:
df.sort_values('Price', ascending=False)


Unnamed: 0,ProductID,ProductName,Category,Price
0,201,Laptop,Electronics,1200.0
1,202,Smartphone,Electronics,800.0
2,203,Tablet,Electronics,400.0
3,204,Monitor,Accessories,200.0
4,205,Keyboard,Accessories,200.0


In [20]:
np.argsort(df['Price'])[::-1]


4    0
3    1
2    2
1    4
0    3
Name: Price, dtype: int32

In [21]:
df['Price'].agg(['sum', 'mean', 'min', 'max'])


sum     2800.0
mean     560.0
min      200.0
max     1200.0
Name: Price, dtype: float64

In [22]:
df.pivot_table(values='Price', index='Category', aggfunc='mean')


Unnamed: 0_level_0,Price
Category,Unnamed: 1_level_1
Accessories,200.0
Electronics,800.0


In [23]:
np.cumsum(df['Price'])


0    1200.0
1    2000.0
2    2400.0
3    2600.0
4    2800.0
Name: Price, dtype: float64

In [24]:
np.isnan(df['Price'].values)


array([False, False, False, False, False])

In [25]:
df['Discounted_Price'] = df['Price'] * 0.9
df

Unnamed: 0,ProductID,ProductName,Category,Price,Discounted_Price
0,201,Laptop,Electronics,1200.0,1080.0
1,202,Smartphone,Electronics,800.0,720.0
2,203,Tablet,Electronics,400.0,360.0
3,204,Monitor,Accessories,200.0,180.0
4,205,Keyboard,Accessories,200.0,180.0


In [26]:

df2 = pd.DataFrame({'ProductID': [201, 202], 'Stock': [50, 30]})
merged_df = pd.merge(df, df2, on='ProductID')
merged_df


Unnamed: 0,ProductID,ProductName,Category,Price,Discounted_Price,Stock
0,201,Laptop,Electronics,1200.0,1080.0,50
1,202,Smartphone,Electronics,800.0,720.0,30


In [27]:
np.concatenate([df['Price'].values, df2['Stock'].values])


array([1200.,  800.,  400.,  200.,  200.,   50.,   30.])