***DataFrame Basics:***
    
    A 2-dimensional labeled data structure with columns of potentially different data types. It's akin to a spreadsheet or SQL table.

    


***1. Creating a dataframe:***

In [35]:
import pandas as pd
import numpy as np

## Creating a DataFrame from a dictionary
data = {'Animal': ['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'],
        'Age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3],
        'Visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'Priority': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']}

#creates a DataFrame 'df' from a dictionary 'data' 
df= pd.DataFrame(data)



In [None]:
#print - Display a summary of the basic information about this DataFrame and its data.
#print(df)

print("\n\nDataFrame:\n\n")
df

***2. Basic DataFrame Operations***

***A. Information and Description:***

In [None]:
# Displaying basic information about the DataFrame
print("\nInformation about DataFrame:\n\n")
df.info()
#print(df.info())


In [32]:
# Summary statistics for numerical columns
print("\nSummary statistics:\n\n")
df.describe()
#print(df.describe())


Summary statistics:




Unnamed: 0,Age,Visits
count,8.0,10.0
mean,3.4375,1.9
std,2.007797,0.875595
min,0.5,1.0
25%,2.375,1.0
50%,3.0,2.0
75%,4.625,2.75
max,7.0,3.0


In [40]:
# Checking how many Rows and Columns (rows, columns)
df.shape

(10, 4)

In [59]:
# Displaying Column Names 
df.columns

Index(['Animal', 'Age', 'Visits', 'Priority'], dtype='object')

In [44]:
df.head()

Unnamed: 0,Animal,Age,Visits,Priority
0,cat,2.5,1,yes
1,cat,3.0,3,yes
2,snake,0.5,2,no
3,dog,,3,yes
4,dog,5.0,2,no


In [45]:
df.tail()

Unnamed: 0,Animal,Age,Visits,Priority
5,cat,2.0,3,no
6,snake,4.5,1,no
7,cat,,1,yes
8,dog,7.0,2,no
9,dog,3.0,1,no


In [47]:
df.loc[:]

Unnamed: 0,Animal,Age,Visits,Priority
0,cat,2.5,1,yes
1,cat,3.0,3,yes
2,snake,0.5,2,no
3,dog,,3,yes
4,dog,5.0,2,no
5,cat,2.0,3,no
6,snake,4.5,1,no
7,cat,,1,yes
8,dog,7.0,2,no
9,dog,3.0,1,no


In [138]:
df.iloc[:]

Unnamed: 0,Animal,Age,Visits,Priority
0,cat,2.5,1,yes
1,cat,3.0,3,yes
2,snake,0.5,2,no
3,dog,,3,yes
4,dog,5.0,2,no
5,cat,2.0,3,no
6,snake,4.5,1,no
7,cat,,1,yes
8,dog,7.0,2,no
9,dog,3.0,1,no


***B. Adding and Deleting Columns:***

In [125]:
# Adding a new column
df['Country'] = 'Nepal'
print("\nDataFrame after adding 'Country' column:\n")
df


DataFrame after adding 'Country' column:



Unnamed: 0,Animal,Age,Visits,Priority,Country
0,cat,2.5,1,yes,Nepal
1,cat,3.0,3,yes,Nepal
2,snake,0.5,2,no,Nepal
3,dog,,3,yes,Nepal
4,dog,5.0,2,no,Nepal
5,cat,2.0,3,no,Nepal
6,snake,4.5,1,no,Nepal
7,cat,,1,yes,Nepal
8,dog,7.0,2,no,Nepal
9,dog,3.0,1,no,Nepal


In [126]:
# Deleting a column
df.drop('Country', axis=1, inplace=True)
print("\nDataFrame after deleting 'Country' column:\n")
print(df)


DataFrame after deleting 'Country' column:

  Animal  Age  Visits Priority
0    cat  2.5       1      yes
1    cat  3.0       3      yes
2  snake  0.5       2       no
3    dog  NaN       3      yes
4    dog  5.0       2       no
5    cat  2.0       3       no
6  snake  4.5       1       no
7    cat  NaN       1      yes
8    dog  7.0       2       no
9    dog  3.0       1       no


***C. Accessing Elements in DataFrame***

1. Accessing specific columns


In [74]:
# Accessing specific columns
print("\nAccessing a specific column:")
df['Animal']



Accessing a specific column:


0      cat
1      cat
2    snake
3      dog
4      dog
5      cat
6    snake
7      cat
8      dog
9      dog
Name: Animal, dtype: object

In [96]:
# Accessing specific rows using loc (label-based)
print("\nAccessing a specific row using loc:")

#df.loc[0]  # Accessing the row with label/index 0
#df.loc[1]  # Accessing the row with label/index 1
#df.loc[2]  # Accessing the row with label/index 2
#df.loc[3]  # Accessing the row with label/index 3
df.loc[5]  # Accessing the row with label/index 4 and so on



Accessing a specific row using loc:


Animal      cat
Age         2.0
Visits        3
Priority     no
Name: 5, dtype: object

In [87]:

# Accessing specific elements using at (label-based)
print("\nAccessing a specific element using at:\n")

print("1.", df.at[0, 'Animal'])  # Accessing the value in row 0, column 'Animal'
print("1.", df.at[0, 'Age'])  # Accessing the value in row 0, column 'Age'
print("2.", df.at[1, 'Animal'])  # Accessing the value in row 1, column 'Animal'
print("3.", df.at[2, 'Animal'])  # Accessing the value in row 2, column 'Animal'
print("4.", df.at[3, 'Animal'])  # Accessing the value in row 3, column 'Animal'
print("5.", df.at[4, 'Animal'])  # Accessing the value in row 4, column 'Animal' and so on


Accessing a specific element using at:

1. cat
1. 2.5
2. cat
3. snake
4. dog
5. dog


2. Return the ***first 4 rows*** of the DataFrame df.

In [52]:
df.head(4)

Unnamed: 0,Animal,Age,Visits,Priority
0,cat,2.5,1,yes
1,cat,3.0,3,yes
2,snake,0.5,2,no
3,dog,,3,yes


In [53]:
df.loc["0":"3"]

Unnamed: 0,Animal,Age,Visits,Priority
0,cat,2.5,1,yes
1,cat,3.0,3,yes
2,snake,0.5,2,no
3,dog,,3,yes


In [54]:
df.iloc[:4]

Unnamed: 0,Animal,Age,Visits,Priority
0,cat,2.5,1,yes
1,cat,3.0,3,yes
2,snake,0.5,2,no
3,dog,,3,yes


3. Select just the ***'Animal' and 'Age'*** columns from the DataFrame df.

In [98]:
#df[["Animal", "Age"]]
df[["Animal", "Visits"]]

Unnamed: 0,Animal,Visits
0,cat,1
1,cat,3
2,snake,2
3,dog,3
4,dog,2
5,cat,3
6,snake,1
7,cat,1
8,dog,2
9,dog,1


In [97]:
df.columns

Index(['Animal', 'Age', 'Visits', 'Priority'], dtype='object')

In [133]:
df.iloc[:,:2]
#df.iloc[:,0:3]
#df.iloc[:,1:3]

Unnamed: 0,Animal,Age
0,cat,2.5
1,cat,3.0
2,snake,0.5
3,dog,
4,dog,5.0
5,cat,2.0
6,snake,4.5
7,cat,
8,dog,7.0
9,dog,3.0


4.  Select the data in ***rows [2, 4, 7]*** and in ***columns ['animal', 'age']***.

In [106]:
df.iloc[[2,4,7],:2]

Unnamed: 0,Animal,Age
2,snake,0.5
4,dog,5.0
7,cat,


5. Filtering and Sorting Data:

In [148]:
# Filtering data based on a condition
filtered_data1 = df[df['Age'] > 2]
print("\nFiltered data based on age > 2:\n")
#print(filtered_data1)
filtered_data1


Filtered data based on age > 2:



Unnamed: 0,Animal,Age,Visits,Priority
0,cat,2.5,1,yes
1,cat,3.0,3,yes
4,dog,5.0,2,no
6,snake,4.5,1,no
8,dog,7.0,2,no
9,dog,3.0,1,no


In [150]:

#Select only the rows where the number of visits is greater than 1.
#df[df["Visits"]>1]
filtered_data2 = df[df["Visits"]>1]
print("\nFiltered data based on Visits > 1:\n")

#print(filtered_data2)
filtered_data2



Filtered data based on Visits > 1:



Unnamed: 0,Animal,Age,Visits,Priority
1,cat,3.0,3,yes
2,snake,0.5,2,no
3,dog,,3,yes
4,dog,5.0,2,no
5,cat,2.0,3,no
8,dog,7.0,2,no


In [151]:

#Select only the rows where the number of visits is less or equal to 1.
print("\nFiltered data based on Visits <= 1:\n")
#df[df["Visits"]<=1]
filtered_data3 = df[df["Visits"]<=1]

#print(filtered_data3)
filtered_data3


Filtered data based on Visits <= 1:



Unnamed: 0,Animal,Age,Visits,Priority
0,cat,2.5,1,yes
6,snake,4.5,1,no
7,cat,,1,yes
9,dog,3.0,1,no


In [153]:
# Sorting data
sorted_data1 = df.sort_values(by='Age', ascending=True)
print("\nSorted data based on age (ascending order):")
sorted_data1


Sorted data based on age (ascending order):


Unnamed: 0,Animal,Age,Visits,Priority
2,snake,0.5,2,no
5,cat,2.0,3,no
0,cat,2.5,1,yes
1,cat,3.0,3,yes
9,dog,3.0,1,no
6,snake,4.5,1,no
4,dog,5.0,2,no
8,dog,7.0,2,no
3,dog,,3,yes
7,cat,,1,yes


In [152]:
# Sorting data
sorted_data2 = df.sort_values(by='Age', ascending=False)
print("\nSorted data based on age (descending order):")
sorted_data2


Sorted data based on age (descending order):


Unnamed: 0,Animal,Age,Visits,Priority
8,dog,7.0,2,no
4,dog,5.0,2,no
6,snake,4.5,1,no
1,cat,3.0,3,yes
9,dog,3.0,1,no
0,cat,2.5,1,yes
5,cat,2.0,3,no
2,snake,0.5,2,no
3,dog,,3,yes
7,cat,,1,yes


5. Select the rows where the age is missing, i.e. is NaN.

In [157]:
df[df["Age"].isnull()]

#df[df["Animal"].isnull()]

Unnamed: 0,Animal,Age,Visits,Priority
3,dog,,3,yes
7,cat,,1,yes


6. Select the rows where the animal is a cat and the age is less than 5.

In [165]:
df[(df["Animal"] == "cat") & (df["Age"] < 5)]

Unnamed: 0,Animal,Age,Visits,Priority
0,cat,2.5,1,yes
1,cat,3.0,3,yes
5,cat,2.0,3,no


7. Select the rows the age is between 2 and 4 (inclusive).

In [166]:
df[df["Age"]. between(2,4)]

Unnamed: 0,Animal,Age,Visits,Priority
0,cat,2.5,1,yes
1,cat,3.0,3,yes
5,cat,2.0,3,no
9,dog,3.0,1,no


8. Change the age in row '1' to 1.5.

In [198]:
#df.loc['1','Age'] = 1.5
#df.drop('1', inplace =True)
df.drop('5', inplace =True)
df
#new_data= pd.DataFrame(data) #just retriving original data 
#new_data

Unnamed: 0,Animal,Age,Visits,Priority
0,cat,2.5,1.0,yes
1,cat,3.0,3.0,yes
2,snake,0.5,2.0,no
3,dog,,3.0,yes
4,dog,5.0,2.0,no
5,cat,2.0,3.0,no
6,snake,4.5,1.0,no
7,cat,,1.0,yes
8,dog,7.0,2.0,no
9,dog,3.0,1.0,no


9. Calculate the sum of Age and Visits (the total number of Age and Visits)

In [172]:
df["Age"].sum()

30.5

In [174]:
df["Visits"].sum()

19.0

10. Calculate the mean age for each different animal in df

In [178]:
df.groupby("Animal")["Age"].mean()

Animal
cat      2.5
dog      5.0
snake    2.5
Name: Age, dtype: float64

In [181]:
df.groupby("Animal")["Visits"].mean()

Animal
cat      2.0
dog      2.0
snake    1.5
Name: Visits, dtype: float64

11. Append a new row '10' to df with your choice of values for each column. Then delete that row to return the original DataFrame.

In [206]:

#Append a new row '10'
df.loc["10"] = ["dog", 11, 4, "yes"]
df

Unnamed: 0,Animal,Age,Visits,Priority
0,cat,2.5,1.0,yes
1,cat,3.0,3.0,yes
2,snake,0.5,2.0,no
3,dog,,3.0,yes
4,dog,5.0,2.0,no
5,cat,2.0,3.0,no
6,snake,4.5,1.0,no
7,cat,,1.0,yes
8,dog,7.0,2.0,no
9,dog,3.0,1.0,no


In [207]:
#delete that row to return the original DataFrame
df.drop('10', inplace = True)
df

Unnamed: 0,Animal,Age,Visits,Priority
0,cat,2.5,1.0,yes
1,cat,3.0,3.0,yes
2,snake,0.5,2.0,no
3,dog,,3.0,yes
4,dog,5.0,2.0,no
5,cat,2.0,3.0,no
6,snake,4.5,1.0,no
7,cat,,1.0,yes
8,dog,7.0,2.0,no
9,dog,3.0,1.0,no


12. Count the number of each type of animal in df

In [209]:
df['Animal'].value_counts()

Animal
cat      4
dog      4
snake    2
Name: count, dtype: int64

In [210]:
df['Priority'].value_counts()

Priority
no     6
yes    4
Name: count, dtype: int64

In [211]:
df['Age'].value_counts()

Age
3.0    2
2.5    1
0.5    1
5.0    1
2.0    1
4.5    1
7.0    1
Name: count, dtype: int64

In [212]:
df['Visits'].value_counts()

Visits
1.0    4
3.0    3
2.0    3
Name: count, dtype: int64

13. Sort df first by the values in the 'Age' in decending order, then by the value in the 'Visits' column in ascending order.

In [213]:
df.sort_values(by=['Age', 'Visits'], ascending = [False, True])

Unnamed: 0,Animal,Age,Visits,Priority
8,dog,7.0,2.0,no
4,dog,5.0,2.0,no
6,snake,4.5,1.0,no
9,dog,3.0,1.0,no
1,cat,3.0,3.0,yes
0,cat,2.5,1.0,yes
5,cat,2.0,3.0,no
2,snake,0.5,2.0,no
7,cat,,1.0,yes
3,dog,,3.0,yes


14. In the 'animal' column, change the 'snake' entries to 'python'.

In [215]:
df['Animal'] = df['Animal'].replace('snake', 'python')
df

Unnamed: 0,Animal,Age,Visits,Priority
0,cat,2.5,1.0,yes
1,cat,3.0,3.0,yes
2,python,0.5,2.0,no
3,dog,,3.0,yes
4,dog,5.0,2.0,no
5,cat,2.0,3.0,no
6,python,4.5,1.0,no
7,cat,,1.0,yes
8,dog,7.0,2.0,no
9,dog,3.0,1.0,no
