In [1]:
import numpy as np
import pandas as pd

Here are the **basic Pandas queries** (methods and functions) that you can use:

---

### **1. Importing and Creating DataFrames**
- `import pandas as pd`
- `pd.DataFrame()`
- `pd.Series()`
- `pd.read_csv()`
- `pd.read_excel()`
- `pd.read_sql()`

---

### **2. DataFrame Properties**
- `df.shape`
- `df.info()`
- `df.describe()`
- `df.dtypes`
- `df.columns`
- `df.index`
- `df.values`

---

### **3. Indexing and Selecting Data**
- `df['column_name']`
- `df.iloc[]` (by index)
- `df.loc[]` (by label)
- `df.at[]` (single value)
- `df.iat[]` (single value by index)
- `df[['col1', 'col2']]` (multiple columns)

---

### **4. DataFrame Operations**
- `df + df` (Addition)
- `df - df` (Subtraction)
- `df * df` (Multiplication)
- `df / df` (Division)

---

### **5. Aggregation Functions**
- `df.sum()`
- `df.mean()`
- `df.min()`
- `df.max()`
- `df.count()`
- `df.std()`
- `df.corr()`
- `df.cov()`
- `df.groupby()`
  
---

### **6. Handling Missing Data**
- `df.isnull()`
- `df.notnull()`
- `df.dropna()`
- `df.fillna()`
- `df.replace()`

---

### **7. Sorting and Filtering**
- `df.sort_values()`
- `df.sort_index()`
- `df.query()`
- `df[df['col'] > value]`

---

### **8. Adding/Modifying Columns**
- `df['new_col'] = value`
- `df.assign()`
- `df.insert()`

---

### **9. Merging and Joining**
- `pd.merge()`
- `df.join()`
- `pd.concat()`

---

### **10. Pivoting and Reshaping**
- `df.pivot()`
- `df.pivot_table()`
- `df.melt()`
- `df.stack()`
- `df.unstack()`

---

### **11. DataFrame Sampling**
- `df.sample()`
- `df.head()`
- `df.tail()`

---

### **12. Writing Data**
- `df.to_csv()`
- `df.to_excel()`
- `df.to_sql()`
- `df.to_json()`

---

These are the **core Pandas queries** you will frequently use when working with data. Let me know if you need more details on any specific query!

In [2]:
dic1 = {
    "name" : ['Hamza', 'Ali', 'Hassan'],
    "marks": [12,23,34],
    "city":  ['ISB', 'RWp', 'Sia']
}


In [3]:
df = pd.DataFrame(dic1)

In [4]:
df

Unnamed: 0,name,marks,city
0,Hamza,12,ISB
1,Ali,23,RWp
2,Hassan,34,Sia


In [5]:
df.to_csv("friends.csv")

In [6]:
df.to_csv("friends.csv_index.csv" , index=False )

In [7]:
df.head(2)

Unnamed: 0,name,marks,city
0,Hamza,12,ISB
1,Ali,23,RWp


In [8]:
df.tail(2)

Unnamed: 0,name,marks,city
1,Ali,23,RWp
2,Hassan,34,Sia


In [9]:
df.describe()

Unnamed: 0,marks
count,3.0
mean,23.0
std,11.0
min,12.0
25%,17.5
50%,23.0
75%,28.5
max,34.0


In [10]:
train = pd.read_csv('train.csv')

In [11]:
train


Unnamed: 0.1,Unnamed: 0,train,city,Fair
0,0,Expressway,rwp,200
1,1,JAFAR,sialkot,400
2,2,ISLAMABAD,Gujranwala,100


In [12]:
train['train']

0    Expressway
1        JAFAR 
2     ISLAMABAD
Name: train, dtype: object

In [13]:
train['city'][0]

'rwp'

In [14]:
train.index=['first' ,'second', 'third']

In [15]:
train

Unnamed: 0.1,Unnamed: 0,train,city,Fair
first,0,Expressway,rwp,200
second,1,JAFAR,sialkot,400
third,2,ISLAMABAD,Gujranwala,100


In [16]:
#Series

s = pd.Series([10, 20, 30, 40])

print(s)

0    10
1    20
2    30
3    40
dtype: int64


In [17]:
#DAtaframe
data = {
    'Name': ['Ali', 'Sara', 'John'],
    'Age': [25, 30, 22]
}

df = pd.DataFrame(data)

print(df)

   Name  Age
0   Ali   25
1  Sara   30
2  John   22


In [18]:
newdf = pd.DataFrame(np.random.rand(334,5), index=np.arange(334))
print(newdf)

            0         1         2         3         4
0    0.828495  0.156326  0.286148  0.417449  0.322434
1    0.377221  0.436404  0.756613  0.677647  0.414334
2    0.092707  0.409874  0.487111  0.512545  0.290176
3    0.531071  0.404889  0.202932  0.346390  0.001561
4    0.264164  0.148936  0.725449  0.938376  0.145358
..        ...       ...       ...       ...       ...
329  0.498992  0.389519  0.999639  0.487691  0.286030
330  0.825715  0.730087  0.385473  0.601623  0.633718
331  0.058963  0.671243  0.885840  0.344577  0.745051
332  0.484230  0.137255  0.666687  0.289031  0.328351
333  0.792250  0.036950  0.877762  0.759674  0.731948

[334 rows x 5 columns]


In [19]:
newdf.head()

Unnamed: 0,0,1,2,3,4
0,0.828495,0.156326,0.286148,0.417449,0.322434
1,0.377221,0.436404,0.756613,0.677647,0.414334
2,0.092707,0.409874,0.487111,0.512545,0.290176
3,0.531071,0.404889,0.202932,0.34639,0.001561
4,0.264164,0.148936,0.725449,0.938376,0.145358


In [20]:
type(newdf)

pandas.core.frame.DataFrame

In [21]:
newdf.describe()

Unnamed: 0,0,1,2,3,4
count,334.0,334.0,334.0,334.0,334.0
mean,0.503122,0.497865,0.519267,0.507871,0.504595
std,0.289275,0.286512,0.284478,0.287361,0.291366
min,0.004523,0.001407,0.003059,0.005217,0.001561
25%,0.260386,0.239219,0.285385,0.262939,0.260297
50%,0.490606,0.485341,0.527137,0.504543,0.499817
75%,0.755066,0.751002,0.770552,0.765426,0.745681
max,0.998814,0.999483,0.999639,0.998884,0.999858


In [22]:
newdf.dtypes

0    float64
1    float64
2    float64
3    float64
4    float64
dtype: object

In [23]:
newdf.index

Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
       ...
       324, 325, 326, 327, 328, 329, 330, 331, 332, 333],
      dtype='int32', length=334)

In [24]:
newdf.columns

RangeIndex(start=0, stop=5, step=1)

In [25]:
newdf.to_numpy()
#Dataframe to array

array([[0.82849525, 0.15632568, 0.28614762, 0.41744869, 0.32243388],
       [0.37722078, 0.43640414, 0.75661346, 0.67764659, 0.41433442],
       [0.09270681, 0.40987361, 0.48711125, 0.51254516, 0.29017586],
       ...,
       [0.05896274, 0.6712433 , 0.8858402 , 0.34457722, 0.74505121],
       [0.48422976, 0.13725513, 0.66668697, 0.28903072, 0.32835105],
       [0.79224971, 0.03695022, 0.87776207, 0.7596736 , 0.73194764]])

In [26]:
newdf.T
#Transpose

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,324,325,326,327,328,329,330,331,332,333
0,0.828495,0.377221,0.092707,0.531071,0.264164,0.426679,0.355314,0.977237,0.975167,0.586813,...,0.778955,0.523374,0.31259,0.984803,0.484207,0.498992,0.825715,0.058963,0.48423,0.79225
1,0.156326,0.436404,0.409874,0.404889,0.148936,0.477904,0.895554,0.093988,0.218977,0.456646,...,0.140455,0.737554,0.070906,0.07857,0.049271,0.389519,0.730087,0.671243,0.137255,0.03695
2,0.286148,0.756613,0.487111,0.202932,0.725449,0.80435,0.930693,0.245259,0.142716,0.108903,...,0.851884,0.8019,0.393073,0.197454,0.256589,0.999639,0.385473,0.88584,0.666687,0.877762
3,0.417449,0.677647,0.512545,0.34639,0.938376,0.350705,0.460825,0.746562,0.021179,0.905499,...,0.690743,0.853052,0.431239,0.929181,0.075172,0.487691,0.601623,0.344577,0.289031,0.759674
4,0.322434,0.414334,0.290176,0.001561,0.145358,0.29472,0.380439,0.793263,0.253225,0.875201,...,0.388686,0.334884,0.817995,0.163511,0.219356,0.28603,0.633718,0.745051,0.328351,0.731948


In [27]:
newdf.head()

Unnamed: 0,0,1,2,3,4
0,0.828495,0.156326,0.286148,0.417449,0.322434
1,0.377221,0.436404,0.756613,0.677647,0.414334
2,0.092707,0.409874,0.487111,0.512545,0.290176
3,0.531071,0.404889,0.202932,0.34639,0.001561
4,0.264164,0.148936,0.725449,0.938376,0.145358


In [28]:
#change 
newdf.loc[0,0] = 0.812

In [29]:
newdf.head(2)

Unnamed: 0,0,1,2,3,4
0,0.812,0.156326,0.286148,0.417449,0.322434
1,0.377221,0.436404,0.756613,0.677647,0.414334


In [30]:
newdf.columns = list("ABCDE")

In [31]:
newdf.head(2)

Unnamed: 0,A,B,C,D,E
0,0.812,0.156326,0.286148,0.417449,0.322434
1,0.377221,0.436404,0.756613,0.677647,0.414334


In [32]:
newdf.loc[(newdf['A']<0.3)  & (newdf['C'] > 0.1)]

Unnamed: 0,A,B,C,D,E
2,0.092707,0.409874,0.487111,0.512545,0.290176
4,0.264164,0.148936,0.725449,0.938376,0.145358
10,0.084930,0.937213,0.346999,0.734857,0.943702
12,0.172134,0.860580,0.402461,0.241368,0.353301
19,0.270359,0.075859,0.828341,0.362633,0.338294
...,...,...,...,...,...
303,0.298299,0.774692,0.785074,0.413316,0.928750
310,0.131391,0.660850,0.869767,0.301326,0.365359
317,0.069870,0.159070,0.236198,0.457519,0.541416
321,0.167232,0.558156,0.436371,0.138194,0.409974


In [33]:
# Big Dictionary
data = {
    'Name': ['Ali', 'Sara', 'John', 'Maya', 'Ahmed', 'Zara', 'Tom', 'Jerry'],
    'Age': [25, 30, 22, 28, 24, 27, 26, 23],
    'City': ['Lahore', 'Karachi', 'Islamabad', 'Rawalpindi', 'Peshawar', 'Multan', 'Quetta', 'Sialkot'],
    'Gender': ['Male', 'Female', 'Male', 'Female', 'Male', 'Female', 'Male', 'Male'],
    'Marks': [85, 90, 75, 88, 92, 80, 70, 78]
}

# Create DataFrame
df = pd.DataFrame(data)

# Display
print(df)


    Name  Age        City  Gender  Marks
0    Ali   25      Lahore    Male     85
1   Sara   30     Karachi  Female     90
2   John   22   Islamabad    Male     75
3   Maya   28  Rawalpindi  Female     88
4  Ahmed   24    Peshawar    Male     92
5   Zara   27      Multan  Female     80
6    Tom   26      Quetta    Male     70
7  Jerry   23     Sialkot    Male     78


In [34]:
df.head()

Unnamed: 0,Name,Age,City,Gender,Marks
0,Ali,25,Lahore,Male,85
1,Sara,30,Karachi,Female,90
2,John,22,Islamabad,Male,75
3,Maya,28,Rawalpindi,Female,88
4,Ahmed,24,Peshawar,Male,92


In [35]:
df.tail(2)

Unnamed: 0,Name,Age,City,Gender,Marks
6,Tom,26,Quetta,Male,70
7,Jerry,23,Sialkot,Male,78


In [36]:
df.shape              # (Rows, Columns)



(8, 5)

In [37]:
df.size               # Total elements (rows × columns)

40

In [38]:
df.columns            # List of all column names

Index(['Name', 'Age', 'City', 'Gender', 'Marks'], dtype='object')

In [39]:
df.index              # Show the row indexes

RangeIndex(start=0, stop=8, step=1)

In [40]:
df.info()             # Full summary (memory, datatypes, etc.)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    8 non-null      object
 1   Age     8 non-null      int64 
 2   City    8 non-null      object
 3   Gender  8 non-null      object
 4   Marks   8 non-null      int64 
dtypes: int64(2), object(3)
memory usage: 452.0+ bytes


In [41]:
df.dtypes             # Data types of each column

Name      object
Age        int64
City      object
Gender    object
Marks      int64
dtype: object

In [42]:
df.describe()         # Statistics for numeric columns

Unnamed: 0,Age,Marks
count,8.0,8.0
mean,25.625,82.25
std,2.66927,7.759786
min,22.0,70.0
25%,23.75,77.25
50%,25.5,82.5
75%,27.25,88.5
max,30.0,92.0


# Selection (Accessing Data)

In [44]:
df['Name']


0      Ali
1     Sara
2     John
3     Maya
4    Ahmed
5     Zara
6      Tom
7    Jerry
Name: Name, dtype: object

In [45]:
df[['Name', 'City', 'Marks']]


Unnamed: 0,Name,City,Marks
0,Ali,Lahore,85
1,Sara,Karachi,90
2,John,Islamabad,75
3,Maya,Rawalpindi,88
4,Ahmed,Peshawar,92
5,Zara,Multan,80
6,Tom,Quetta,70
7,Jerry,Sialkot,78


In [46]:
df.loc[3]             # Row with index label 3


Name            Maya
Age               28
City      Rawalpindi
Gender        Female
Marks             88
Name: 3, dtype: object

In [47]:
df.head(7)

Unnamed: 0,Name,Age,City,Gender,Marks
0,Ali,25,Lahore,Male,85
1,Sara,30,Karachi,Female,90
2,John,22,Islamabad,Male,75
3,Maya,28,Rawalpindi,Female,88
4,Ahmed,24,Peshawar,Male,92
5,Zara,27,Multan,Female,80
6,Tom,26,Quetta,Male,70


In [48]:
df.iloc[5]            # 6th row (position 5)


Name        Zara
Age           27
City      Multan
Gender    Female
Marks         80
Name: 5, dtype: object

In [49]:
# loc[ row_label , column_name ]

df.loc[2, 'City']     # Value in row 2, column 'City'



'Islamabad'

In [50]:
# iloc[ row_number , column_number ]

df.iloc[4, 2]         # Value at row 4, column 2

'Peshawar'

#Filtering Data


In [52]:
df.head(7)

Unnamed: 0,Name,Age,City,Gender,Marks
0,Ali,25,Lahore,Male,85
1,Sara,30,Karachi,Female,90
2,John,22,Islamabad,Male,75
3,Maya,28,Rawalpindi,Female,88
4,Ahmed,24,Peshawar,Male,92
5,Zara,27,Multan,Female,80
6,Tom,26,Quetta,Male,70


In [53]:
df[df['Age'] > 25]            # Rows where Age > 25

Unnamed: 0,Name,Age,City,Gender,Marks
1,Sara,30,Karachi,Female,90
3,Maya,28,Rawalpindi,Female,88
5,Zara,27,Multan,Female,80
6,Tom,26,Quetta,Male,70


In [54]:
df[df['Gender'] == 'Female']  # Only Female entries

Unnamed: 0,Name,Age,City,Gender,Marks
1,Sara,30,Karachi,Female,90
3,Maya,28,Rawalpindi,Female,88
5,Zara,27,Multan,Female,80


In [55]:
df[(df['Age'] > 25) & (df['Marks'] > 80)]   # Both conditions

Unnamed: 0,Name,Age,City,Gender,Marks
1,Sara,30,Karachi,Female,90
3,Maya,28,Rawalpindi,Female,88


In [56]:
df[(df['City'] == 'Lahore') | (df['City'] == 'Karachi')]  # Either condition

Unnamed: 0,Name,Age,City,Gender,Marks
0,Ali,25,Lahore,Male,85
1,Sara,30,Karachi,Female,90


📤 Sorting

In [58]:
df.sort_values('Age')

Unnamed: 0,Name,Age,City,Gender,Marks
2,John,22,Islamabad,Male,75
7,Jerry,23,Sialkot,Male,78
4,Ahmed,24,Peshawar,Male,92
0,Ali,25,Lahore,Male,85
6,Tom,26,Quetta,Male,70
5,Zara,27,Multan,Female,80
3,Maya,28,Rawalpindi,Female,88
1,Sara,30,Karachi,Female,90


In [59]:
df.sort_values('Marks', ascending=True)

Unnamed: 0,Name,Age,City,Gender,Marks
6,Tom,26,Quetta,Male,70
2,John,22,Islamabad,Male,75
7,Jerry,23,Sialkot,Male,78
5,Zara,27,Multan,Female,80
0,Ali,25,Lahore,Male,85
3,Maya,28,Rawalpindi,Female,88
1,Sara,30,Karachi,Female,90
4,Ahmed,24,Peshawar,Male,92


In [60]:
df.sort_values('Marks', ascending=False)

Unnamed: 0,Name,Age,City,Gender,Marks
4,Ahmed,24,Peshawar,Male,92
1,Sara,30,Karachi,Female,90
3,Maya,28,Rawalpindi,Female,88
0,Ali,25,Lahore,Male,85
5,Zara,27,Multan,Female,80
7,Jerry,23,Sialkot,Male,78
2,John,22,Islamabad,Male,75
6,Tom,26,Quetta,Male,70


 🆕 Adding and Modifying Data

In [62]:
df['Country'] = 'Pakistan'

In [63]:
df['Marks'] = df['Marks'] + 5     # Increase all marks by 5

In [64]:
df.head(7)

Unnamed: 0,Name,Age,City,Gender,Marks,Country
0,Ali,25,Lahore,Male,90,Pakistan
1,Sara,30,Karachi,Female,95,Pakistan
2,John,22,Islamabad,Male,80,Pakistan
3,Maya,28,Rawalpindi,Female,93,Pakistan
4,Ahmed,24,Peshawar,Male,97,Pakistan
5,Zara,27,Multan,Female,85,Pakistan
6,Tom,26,Quetta,Male,75,Pakistan


🗑️ Deleting Data

In [66]:
df.drop('City', axis=1)          # Remove 'City' column (temp)

Unnamed: 0,Name,Age,Gender,Marks,Country
0,Ali,25,Male,90,Pakistan
1,Sara,30,Female,95,Pakistan
2,John,22,Male,80,Pakistan
3,Maya,28,Female,93,Pakistan
4,Ahmed,24,Male,97,Pakistan
5,Zara,27,Female,85,Pakistan
6,Tom,26,Male,75,Pakistan
7,Jerry,23,Male,83,Pakistan


In [67]:
df.drop(2)  #drop a row

Unnamed: 0,Name,Age,City,Gender,Marks,Country
0,Ali,25,Lahore,Male,90,Pakistan
1,Sara,30,Karachi,Female,95,Pakistan
3,Maya,28,Rawalpindi,Female,93,Pakistan
4,Ahmed,24,Peshawar,Male,97,Pakistan
5,Zara,27,Multan,Female,85,Pakistan
6,Tom,26,Quetta,Male,75,Pakistan
7,Jerry,23,Sialkot,Male,83,Pakistan


🔄 Reset and Rename

In [69]:
df.reset_index(drop=True)

Unnamed: 0,Name,Age,City,Gender,Marks,Country
0,Ali,25,Lahore,Male,90,Pakistan
1,Sara,30,Karachi,Female,95,Pakistan
2,John,22,Islamabad,Male,80,Pakistan
3,Maya,28,Rawalpindi,Female,93,Pakistan
4,Ahmed,24,Peshawar,Male,97,Pakistan
5,Zara,27,Multan,Female,85,Pakistan
6,Tom,26,Quetta,Male,75,Pakistan
7,Jerry,23,Sialkot,Male,83,Pakistan


In [70]:
df.rename(columns={'Marks': 'Score'})

Unnamed: 0,Name,Age,City,Gender,Score,Country
0,Ali,25,Lahore,Male,90,Pakistan
1,Sara,30,Karachi,Female,95,Pakistan
2,John,22,Islamabad,Male,80,Pakistan
3,Maya,28,Rawalpindi,Female,93,Pakistan
4,Ahmed,24,Peshawar,Male,97,Pakistan
5,Zara,27,Multan,Female,85,Pakistan
6,Tom,26,Quetta,Male,75,Pakistan
7,Jerry,23,Sialkot,Male,83,Pakistan


 Handling Missing Data

In [72]:
df.isnull()

Unnamed: 0,Name,Age,City,Gender,Marks,Country
0,False,False,False,False,False,False
1,False,False,False,False,False,False
2,False,False,False,False,False,False
3,False,False,False,False,False,False
4,False,False,False,False,False,False
5,False,False,False,False,False,False
6,False,False,False,False,False,False
7,False,False,False,False,False,False


In [73]:
df.isnull().sum()

Name       0
Age        0
City       0
Gender     0
Marks      0
Country    0
dtype: int64

In [74]:
# Drop missing values
df.dropna()


Unnamed: 0,Name,Age,City,Gender,Marks,Country
0,Ali,25,Lahore,Male,90,Pakistan
1,Sara,30,Karachi,Female,95,Pakistan
2,John,22,Islamabad,Male,80,Pakistan
3,Maya,28,Rawalpindi,Female,93,Pakistan
4,Ahmed,24,Peshawar,Male,97,Pakistan
5,Zara,27,Multan,Female,85,Pakistan
6,Tom,26,Quetta,Male,75,Pakistan
7,Jerry,23,Sialkot,Male,83,Pakistan


In [75]:
df['City'].unique()

array(['Lahore', 'Karachi', 'Islamabad', 'Rawalpindi', 'Peshawar',
       'Multan', 'Quetta', 'Sialkot'], dtype=object)

In [76]:
	df['City'].nunique()

8

In [77]:
df['City'].value_counts()

City
Lahore        1
Karachi       1
Islamabad     1
Rawalpindi    1
Peshawar      1
Multan        1
Quetta        1
Sialkot       1
Name: count, dtype: int64

In [78]:
df.head(7)

Unnamed: 0,Name,Age,City,Gender,Marks,Country
0,Ali,25,Lahore,Male,90,Pakistan
1,Sara,30,Karachi,Female,95,Pakistan
2,John,22,Islamabad,Male,80,Pakistan
3,Maya,28,Rawalpindi,Female,93,Pakistan
4,Ahmed,24,Peshawar,Male,97,Pakistan
5,Zara,27,Multan,Female,85,Pakistan
6,Tom,26,Quetta,Male,75,Pakistan
