In [1]:
import pandas as pd

# 1 Working With Series in pandas 

# Creating an empty Series:

In [2]:
pd.Series()

Series([], dtype: object)

# Creating series from data

In [3]:
import numpy as np

In [4]:
data = np.array([10,20,30])
data2 = np.array(['n','0','o','r'])

In [5]:
ser1 = pd.Series(data)
ser2 = pd.Series(data2)
print(ser1)
print(ser2)

0    10
1    20
2    30
dtype: int32
0    n
1    0
2    o
3    r
dtype: object


# Creating a series from array with an index:

In [6]:
mylist = np.array(['noor','john','karan','shazia'])

In [7]:
mylist

array(['noor', 'john', 'karan', 'shazia'], dtype='<U6')

In [8]:
ser_mylist  = pd.Series(data=mylist,index=[10,20,30,40])

In [9]:
ser_mylist

10      noor
20      john
30     karan
40    shazia
dtype: object

# Creating a Series from list

In [10]:
sub_ka_list = [10,20,30,40,50]
pd.Series(sub_ka_list)

0    10
1    20
2    30
3    40
4    50
dtype: int64

# Creating a Series from Dictionary

In [11]:
mydict = {
    'a':'apple',
    'b':'ball',
    'c':'cat',
    'd':'dog'
}

In [12]:
pd.Series(mydict)

a    apple
b     ball
c      cat
d      dog
dtype: object

# Creating a series from Scalar value:

In [13]:
pd.Series(10,index=[0,1,2,3,4,5])

0    10
1    10
2    10
3    10
4    10
5    10
dtype: int64

# Creating a series with Numpy Functions

In [14]:
pd.Series(np.linspace(0,100,10))

0      0.000000
1     11.111111
2     22.222222
3     33.333333
4     44.444444
5     55.555556
6     66.666667
7     77.777778
8     88.888889
9    100.000000
dtype: float64

# Creating a Series using range function:

In [15]:
pd.Series(range(10))

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

# Creating a Series using for loop and list comprehension:

In [16]:
pd.Series(range(1,5),index=[x for x in 'abcd'])

a    1
b    2
c    3
d    4
dtype: int64

# Creating a Series using mathematical expressions:

In [17]:
ser = np.arange(10,15)
pd.Series(data=ser*5)

0    50
1    55
2    60
3    65
4    70
dtype: int32

# Series Working with Real Dataset

In [18]:
df = pd.read_excel('VariableWithIDs.xlsx')

In [19]:
df.head(1)

Unnamed: 0,Crop Name_ID,Crop Name,Cotton Varity type_ID,Cotton Varity type,Crop Seed Variety_ID,Crop Seed Variety,Field level_ID,Field level,Crop Health_ID,Crop Health,...,Soil Type_ID,Soil Type,Unnamed: 34,Landuse_ID,Landuse,Type,Unnamed: 38,Landuse.1,Type.1,Type_ID
0,1.0,Sugarcane,1.0,Desi,1,MNH-1050,1.0,Levelled,1.0,Healthy,...,1.0,Clay soil,,1.0,Builtup,Residential,,Sugarcane,Agricultural,


In [20]:
var_series = df['Crop Name']

In [21]:
var_series

0     Sugarcane
1          Rice
2        Cotton
3         Bajra
4         Jowar
        ...    
65          NaN
66          NaN
67          NaN
68          NaN
69          NaN
Name: Crop Name, Length: 70, dtype: object

In [22]:
# Basic operations on Series
print(var_series.head())  # Display the first few elements of the Series
print(var_series.describe())  # Generate descriptive statistics of the Series
print(var_series.value_counts())  # Count the occurrences of unique values in the Series

0    Sugarcane
1         Rice
2       Cotton
3        Bajra
4        Jowar
Name: Crop Name, dtype: object
count            24
unique           24
top       Sugarcane
freq              1
Name: Crop Name, dtype: object
Crop Name
Sugarcane           1
Rice                1
SandDunes           1
Pond                1
Fish Farm           1
Desert              1
Canal               1
Builtup             1
Others              1
Pomegranate         1
Mango Orchard       1
Fooder              1
Sunflower           1
Safflower           1
Soyabean            1
Groundnut           1
Chillies            1
Turmeric (Haldi)    1
Seasamum            1
Maize               1
Jowar               1
Bajra               1
Cotton              1
StagnantWater       1
Name: count, dtype: int64


In [23]:
# Filtering data using Series
var_series[var_series == 'Maize']  # Filter values 

5    Maize
Name: Crop Name, dtype: object

In [24]:
# Applying functions to Series
def custom_function():
    # Apply some custom logic
    for var in var_series:
        print(var)

custom_function()

Sugarcane
Rice
Cotton
Bajra
Jowar
Maize
Seasamum
Turmeric (Haldi)
Chillies
Groundnut
Soyabean
Safflower
Sunflower
Fooder
Mango Orchard
Pomegranate
Others
Builtup
Canal
Desert
Fish Farm
Pond
SandDunes
StagnantWater
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan


In [25]:
# Sorting Series
var = pd.Series(data=[1,2,3,4,5])
var.sort_values(ascending=False)  # Sort the Series in descending order

4    5
3    4
2    3
1    2
0    1
dtype: int64

In [26]:
# drop nan values
var_series.dropna()

0            Sugarcane
1                 Rice
2               Cotton
3                Bajra
4                Jowar
5                Maize
6             Seasamum
7     Turmeric (Haldi)
8             Chillies
9            Groundnut
10            Soyabean
11           Safflower
12           Sunflower
13              Fooder
14       Mango Orchard
15         Pomegranate
16              Others
17             Builtup
18               Canal
19              Desert
20           Fish Farm
21                Pond
22           SandDunes
23       StagnantWater
Name: Crop Name, dtype: object

In [27]:
# Modifying Series values
var = pd.Series([1,2,3])
var.replace({1:10,2:20})

0    10
1    20
2     3
dtype: int64

In [28]:
# Concatenating Series
series1 = pd.Series([1, 2, 3])
series2 = pd.Series([4, 5, 6])
pd.concat([series1, series2])  # Concatenate two Series

0    1
1    2
2    3
0    4
1    5
2    6
dtype: int64

In [29]:
# Other useful Series functions
column_series = pd.Series([10,2,3,4,5,10,10])
print(" unique :\n",column_series.unique())  # Get unique values in the Series
print(" largest val :\n ", column_series.nlargest(2))  # Get the 5 largest values in the Series
print(" max index : \n ",column_series.idxmax()) # Get the index of the maximum value in the Series

 unique :
 [10  2  3  4  5]
 largest val :
  0    10
5    10
dtype: int64
 max index : 
  0


# 2 Working With Dataframe

# Creating a Pandas DataFrame

In [30]:
mydata = ['a','b','c','d','e','f']

In [32]:
pd.DataFrame(data=mydata,columns=['letters'])

Unnamed: 0,letters
0,a
1,b
2,c
3,d
4,e
5,f


# Creating DataFrame from dict of ndarray/lists:

In [36]:
mydict = {
    'name':['noor','raza','shazia'],
    'Age':[10,20,30],
    'gender':['male','male','female']
}
pd.DataFrame(mydict)

Unnamed: 0,name,Age,gender
0,noor,10,male
1,raza,20,male
2,shazia,30,female


# Working with real Dataset

In [39]:
df.head(1)

Unnamed: 0,Crop Name_ID,Crop Name,Cotton Varity type_ID,Cotton Varity type,Crop Seed Variety_ID,Crop Seed Variety,Field level_ID,Field level,Crop Health_ID,Crop Health,...,Soil Type_ID,Soil Type,Unnamed: 34,Landuse_ID,Landuse,Type,Unnamed: 38,Landuse.1,Type.1,Type_ID
0,1.0,Sugarcane,1.0,Desi,1,MNH-1050,1.0,Levelled,1.0,Healthy,...,1.0,Clay soil,,1.0,Builtup,Residential,,Sugarcane,Agricultural,


In [45]:
# Accessing columns in DataFrame
column_series = df['Crop Health']  # Extract a single column as a Series
column_list = df['Crop Name'].tolist()  # Convert a column to a list
subset_df = df[['Field level_ID', 'Field level']]  # Extract a subset of columns as a new DataFrame

In [47]:
# Basic operations on DataFrame
df.shape # Get the dimensions of the DataFrame
df.describe()  # Generate descriptive statistics of the DataFrame
df.info()  # Get information about the DataFrame

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70 entries, 0 to 69
Data columns (total 42 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Crop Name_ID            26 non-null     float64
 1   Crop Name               24 non-null     object 
 2   Cotton Varity type_ID   2 non-null      float64
 3   Cotton Varity type      2 non-null      object 
 4   Crop Seed Variety_ID    70 non-null     int64  
 5   Crop Seed Variety       70 non-null     object 
 6   Field level_ID          2 non-null      float64
 7   Field level             2 non-null      object 
 8   Crop Health_ID          3 non-null      float64
 9   Crop Health             3 non-null      object 
 10  Unhealthy Condtions_ID  8 non-null      float64
 11  Unhealthy Condtions     8 non-null      object 
 12  Pest_ID                 2 non-null      float64
 13  Pest                    2 non-null      object 
 14  Pest name_ID            30 non-null     floa

In [48]:
df.head(1)

Unnamed: 0,Crop Name_ID,Crop Name,Cotton Varity type_ID,Cotton Varity type,Crop Seed Variety_ID,Crop Seed Variety,Field level_ID,Field level,Crop Health_ID,Crop Health,...,Soil Type_ID,Soil Type,Unnamed: 34,Landuse_ID,Landuse,Type,Unnamed: 38,Landuse.1,Type.1,Type_ID
0,1.0,Sugarcane,1.0,Desi,1,MNH-1050,1.0,Levelled,1.0,Healthy,...,1.0,Clay soil,,1.0,Builtup,Residential,,Sugarcane,Agricultural,


In [50]:
# Filtering data using DataFrame
filtered_df = df[df['Crop Name'] == 'Sugarcane']  # Filter rows based on a condition
filtered_df.head()

Unnamed: 0,Crop Name_ID,Crop Name,Cotton Varity type_ID,Cotton Varity type,Crop Seed Variety_ID,Crop Seed Variety,Field level_ID,Field level,Crop Health_ID,Crop Health,...,Soil Type_ID,Soil Type,Unnamed: 34,Landuse_ID,Landuse,Type,Unnamed: 38,Landuse.1,Type.1,Type_ID
0,1.0,Sugarcane,1.0,Desi,1,MNH-1050,1.0,Levelled,1.0,Healthy,...,1.0,Clay soil,,1.0,Builtup,Residential,,Sugarcane,Agricultural,


In [52]:
# Modifying DataFrame
df['new_column'] = df['Crop Name'] + df['Crop Health']  # Create a new column based on existing columns
df.dropna()  # Remove rows with missing values
df.drop('Crop Name', axis=1, inplace=True)  # Drop a column from the DataFrame

In [53]:
# Sorting DataFrame
sorted_df = df.sort_values(by='new_column', ascending=False) 

In [55]:
df.head(2)

Unnamed: 0,Crop Name_ID,Cotton Varity type_ID,Cotton Varity type,Crop Seed Variety_ID,Crop Seed Variety,Field level_ID,Field level,Crop Health_ID,Crop Health,Unhealthy Condtions_ID,...,Soil Type,Unnamed: 34,Landuse_ID,Landuse,Type,Unnamed: 38,Landuse.1,Type.1,Type_ID,new_column
0,1.0,1.0,Desi,1,MNH-1050,1.0,Levelled,1.0,Healthy,1.0,...,Clay soil,,1.0,Builtup,Residential,,Sugarcane,Agricultural,,SugarcaneHealthy
1,2.0,2.0,American,2,MNH-1035,2.0,Not Levelled,2.0,Partially healthy,2.0,...,Loamy soil,,2.0,"Canal,Fish Farm,Pond,StagnantWater",Water Bodies,,Rice,Agricultural,,RicePartially healthy


In [56]:
# Accessing values in DataFrame
df.at[1, 'Cotton Varity type']  # Access a specific value by row index and column name

'American'

In [57]:
# Iterating over DataFrame
for index, row in df.iterrows():
    # Iterate over each row in the DataFrame
    print(f"Index: {index}, Row: {row}")

Index: 0, Row: Crop Name_ID                              1.0
Cotton Varity type_ID                     1.0
Cotton Varity type                       Desi
Crop Seed Variety_ID                        1
Crop Seed Variety                    MNH-1050
Field level_ID                            1.0
Field level                          Levelled
Crop Health_ID                            1.0
Crop Health                           Healthy
Unhealthy Condtions_ID                    1.0
Unhealthy Condtions       No visible symptoms
Pest_ID                                   1.0
Pest                                      Yes
Pest name_ID                              1.0
Pest name                     Cotton bollworm
Disease_ID                                1.0
Disease                                   Yes
Disease Type_ID                           1.0
Disease Type              Twig and Stem bligh
Fertilizer Used_ID                        1.0
Fertilizer Used                          Urea
Fertilizer elements

In [58]:
# Other useful DataFrame functions
df.isnull()  # Check for missing values in the DataFrame

Unnamed: 0,Crop Name_ID,Cotton Varity type_ID,Cotton Varity type,Crop Seed Variety_ID,Crop Seed Variety,Field level_ID,Field level,Crop Health_ID,Crop Health,Unhealthy Condtions_ID,...,Soil Type,Unnamed: 34,Landuse_ID,Landuse,Type,Unnamed: 38,Landuse.1,Type.1,Type_ID,new_column
0,False,False,False,False,False,False,False,False,False,False,...,False,True,False,False,False,True,False,False,True,False
1,False,False,False,False,False,False,False,False,False,False,...,False,True,False,False,False,True,False,False,True,False
2,False,True,True,False,False,True,True,False,False,False,...,False,True,False,False,False,True,False,False,True,False
3,False,True,True,False,False,True,True,True,True,False,...,False,True,False,False,False,True,False,False,True,True
4,False,True,True,False,False,True,True,True,True,False,...,False,True,False,True,True,True,False,False,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,True,True,True,False,False,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
66,True,True,True,False,False,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
67,True,True,True,False,False,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
68,True,True,True,False,False,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
