In [1]:
import pandas as pd

# Create a simple sales data
data = {
    'Product': ['A', 'B', 'C', 'D', 'E'],
    'Price': [100, 250, 300, 150, 500],
    'Quantity': [3, 1, 4, 2, 1]
}

df = pd.DataFrame(data)
print(df)


  Product  Price  Quantity
0       A    100         3
1       B    250         1
2       C    300         4
3       D    150         2
4       E    500         1


In [2]:
print(df.shape)        # (rows, columns)
print(df.columns)      # List of column names
print(df.info())       # Structure of the DataFrame
print(df.describe())   # Summary statistics


(5, 3)
Index(['Product', 'Price', 'Quantity'], dtype='object')
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Product   5 non-null      object
 1   Price     5 non-null      int64 
 2   Quantity  5 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 252.0+ bytes
None
            Price  Quantity
count    5.000000   5.00000
mean   260.000000   2.20000
std    155.724115   1.30384
min    100.000000   1.00000
25%    150.000000   1.00000
50%    250.000000   2.00000
75%    300.000000   3.00000
max    500.000000   4.00000


In [6]:
df['Total Sale'] = df['Price'] * df['Quantity']
df['Discounted Price'] = df['Price'] * 0.9
df['Final Sale'] = df['Discounted Price'] * df['Quantity']
print(df)

  Product  Price  Quantity  Total Sale  Discounted Price  Final Sale
0       A    100         3         300              90.0       270.0
1       B    250         1         250             225.0       225.0
2       C    300         4        1200             270.0      1080.0
3       D    150         2         300             135.0       270.0
4       E    500         1         500             450.0       450.0


In [7]:
# Products with price > 200
print(df[df['Price'] > 200])

# Products with total sale > 300
print(df[df['Total Sale'] > 300])


  Product  Price  Quantity  Total Sale  Discounted Price  Final Sale
1       B    250         1         250             225.0       225.0
2       C    300         4        1200             270.0      1080.0
4       E    500         1         500             450.0       450.0
  Product  Price  Quantity  Total Sale  Discounted Price  Final Sale
2       C    300         4        1200             270.0      1080.0
4       E    500         1         500             450.0       450.0


In [9]:
df = df.sort_values(by='Total Sale', ascending=False)
df = df[['Product', 'Price', 'Quantity', 'Total Sale', 'Final Sale']]  # Reorder columns
print(df)


  Product  Price  Quantity  Total Sale  Final Sale
2       C    300         4        1200      1080.0
4       E    500         1         500       450.0
0       A    100         3         300       270.0
3       D    150         2         300       270.0
1       B    250         1         250       225.0


In [10]:
df.loc[2, 'Price'] = None  # Add missing value

# Detect missing
print(df.isnull())

# Fill missing with mean
df['Price'] = df['Price'].fillna(df['Price'].mean())


   Product  Price  Quantity  Total Sale  Final Sale
2    False   True     False       False       False
4    False  False     False       False       False
0    False  False     False       False       False
3    False  False     False       False       False
1    False  False     False       False       False


In [11]:
# Assume new column
df['Category'] = ['Electronics', 'Clothing', 'Electronics', 'Clothing', 'Clothing']

# Group by Category
print(df.groupby('Category')['Total Sale'].sum())


Category
Clothing       1050
Electronics    1500
Name: Total Sale, dtype: int64


In [13]:
df.to_csv('sales_data.csv', index=False)  # Save to CSV

df_loaded = pd.read_csv('sales_data.csv')  # Load again
print(df)


  Product  Price  Quantity  Total Sale  Final Sale     Category
2       C  250.0         4        1200      1080.0  Electronics
4       E  500.0         1         500       450.0     Clothing
0       A  100.0         3         300       270.0  Electronics
3       D  150.0         2         300       270.0     Clothing
1       B  250.0         1         250       225.0     Clothing
