 Introducing DataFrames
Create a DataFrame with Product, Quantity, and Price.

In [1]:
import pandas as pd

shop = pd.DataFrame(
    {
        'Product' : ['Table', 'Chair', 'Plug', 'Lamp'],
        'Quantity' : [20, 10, 30, 50],
        'Price' : [100000, 5000, 6000, 50000]
    }
)
print(shop)

  Product  Quantity   Price
0   Table        20  100000
1   Chair        10    5000
2    Plug        30    6000
3    Lamp        50   50000


 Inspecting a DataFrame
Load any DataFrame and use .info(), .describe(), .shape

In [2]:
print(shop.shape, "\n")
print(shop.describe(),"\n")
print(shop.info(),"\n")

(4, 3) 

        Quantity          Price
count   4.000000       4.000000
mean   27.500000   40250.000000
std    17.078251   45021.291259
min    10.000000    5000.000000
25%    17.500000    5750.000000
50%    25.000000   28000.000000
75%    35.000000   62500.000000
max    50.000000  100000.000000 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Product   4 non-null      object
 1   Quantity  4 non-null      int64 
 2   Price     4 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 228.0+ bytes
None 



Parts of a DataFrame
Use .index, .columns, and .values on your DataFrame

In [8]:
print(shop.index,"\n")
print(shop.columns,"\n")
print(shop.values,"\n")


RangeIndex(start=0, stop=4, step=1) 

Index(['Product', 'Quantity', 'Price'], dtype='object') 

[['Table' 20 100000]
 ['Chair' 10 5000]
 ['Plug' 30 6000]
 ['Lamp' 50 50000]] 



Sort by Price from high to low.

In [9]:
print(shop.sort_values('Price', ascending=False))

  Product  Quantity   Price
0   Table        20  100000
3    Lamp        50   50000
2    Plug        30    6000
1   Chair        10    5000


Subsetting Columns
Show only Product and Price.

In [10]:
product_price = shop[['Product', 'Price']]
print(product_price)

  Product   Price
0   Table  100000
1   Chair    5000
2    Plug    6000
3    Lamp   50000


Subsetting Rows
Filter rows where Quantity > 10.

In [11]:
quantity_morethan_10 = shop[shop['Quantity'] > 10]
print(quantity_morethan_10)

  Product  Quantity   Price
0   Table        20  100000
2    Plug        30    6000
3    Lamp        50   50000


Subsetting by Categorical Variable
Filter rows where product is 'chair'

In [15]:
chair_only = shop[shop['Product'] == '']
print(chair_only)

Empty DataFrame
Columns: [Product, Quantity, Price]
Index: []


New Columns
Create a column Total = Price * Quantity.

In [17]:
new_column = shop['Total'] = shop['Price'] * shop['Quantity']
print(new_column)
print(shop)

0    2000000
1      50000
2     180000
3    2500000
dtype: int64
  Product  Quantity   Price    Total
0   Table        20  100000  2000000
1   Chair        10    5000    50000
2    Plug        30    6000   180000
3    Lamp        50   50000  2500000


Adding New Columns
Add a column Store = 'Kathmandu'.

In [24]:
shop['Store'] = ['Kathmandu', 'Pokhara', 'Lalitpur', 'Bhaktapur']

print(shop)

  Product  Quantity   Price    Total      Store
0   Table        20  100000  2000000  Kathmandu
1   Chair        10    5000    50000    Pokhara
2    Plug        30    6000   180000   Lalitpur
3    Lamp        50   50000  2500000  Bhaktapur


Combo-Attack
Filter Price > 100, show Product and Price, sort by Price.

In [21]:
filter_price = shop[shop['Price'] > 100][['Product', 'Price']].sort_values('Price')
print(filter_price)


  Product   Price
1   Chair    5000
2    Plug    6000
3    Lamp   50000
0   Table  100000


Mini Project: Walmart Sales

we are analyzing a Walmart sales dataset.

In [5]:
#Load the dataset using pd.read_csv().
# Inspect using .head(), .info(), .describe()

walmart = pd.read_csv("Walmart_Sales.csv")

walmart.columns = walmart.columns.str.strip().str.replace('"', ' ').str.replace(' ', ' ')
print(walmart.head())


   Store        Date  Weekly_Sales  Holiday_Flag  Temperature  Fuel_Price  \
0      1  05-02-2010    1643690.90             0        42.31       2.572   
1      1  12-02-2010    1641957.44             1        38.51       2.548   
2      1  19-02-2010    1611968.17             0        39.93       2.514   
3      1  26-02-2010    1409727.59             0        46.63       2.561   
4      1  05-03-2010    1554806.68             0        46.50       2.625   

          CPI  Unemployment  
0  211.096358         8.106  
1  211.242170         8.106  
2  211.289143         8.106  
3  211.319643         8.106  
4  211.350143         8.106  


In [None]:
# Subset:
# Rows where sales > 100

sales_more_than100 = walmart[walmart['Weekly_Sales'] > 100]
print(sales_more_than100.head(), "\n")

# Columns: Store, Weekly_Sales
store_weekly_sales = walmart[['Store', 'Weekly_Sales']]
print(store_weekly_sales.head(), "\n")

# Sort by Weekly_Sales (descending).
sorting_weekly_sales = walmart.sort_values('Weekly_Sales', ascending=False)
print(sorting_weekly_sales.head(), "\n")


      Store        Date  Weekly_Sales  Holiday_Flag  Temperature  Fuel_Price  \
1905     14  24-12-2010    3818686.45             0        30.59       3.141   
2763     20  24-12-2010    3766687.43             0        25.17       3.141   
1333     10  24-12-2010    3749057.69             0        57.06       3.236   
527       4  23-12-2011    3676388.98             0        35.92       3.103   
1762     13  24-12-2010    3595903.20             0        34.90       2.846   

             CPI  Unemployment  
1905  182.544590         8.724  
2763  204.637673         7.484  
1333  126.983581         9.003  
527   129.984548         5.143  
1762  126.983581         7.795   



In [4]:
# Add column:
# Discounted_Sales = Weekly_Sales * 0.9
walmart['Discounted_Sales'] = walmart['Weekly_Sales'] * 0.9
# print(walmart.head())

# Filter for a single Store and show top 5 discounted sales.
store_1 = walmart[walmart['Store'] == 1]

sort_store_1 = store_1.sort_values(by='Discounted_Sales', ascending=False)
top5_store1 = sort_store_1.head()
print(top5_store1)


     Store        Date  Weekly_Sales  Holiday_Flag  Temperature  Fuel_Price  \
46       1  24-12-2010    2387950.20             0        52.33       2.886   
98       1  23-12-2011    2270188.99             0        47.96       3.112   
94       1  25-11-2011    2033320.66             1        60.14       3.236   
42       1  26-11-2010    1955624.11             1        64.52       2.735   
113      1  06-04-2012    1899676.88             0        70.43       3.891   

            CPI  Unemployment  Discounted_Sales  
46   211.405122         7.838       2149155.180  
98   219.357722         7.866       2043170.091  
94   218.467621         7.866       1829988.594  
42   211.748433         7.838       1760061.699  
113  221.435611         7.143       1709709.192  


In [None]:
# Try to combine steps 3 to 6 into one line using method chaining (combo-attack).

result = (
    walmart[walmart['Weekly_Sales'] > 100]
    [['Store', 'Weekly_Sales']].sort_values(by='Weekly_Sales', ascending=False)
    .assign(Discounted_Sales = lambda x: x['Weekly_Sales'] * 0.9)
)

print(result.head())


      Store  Weekly_Sales  Discounted_Sales
1905     14    3818686.45       3436817.805
2763     20    3766687.43       3390018.687
1333     10    3749057.69       3374151.921
527       4    3676388.98       3308750.082
1762     13    3595903.20       3236312.880
