# Some Useful Methods in Pandas

# Import pandas and numpy

In [3]:
import numpy as np
import pandas as pd

# Create a pandas Series object named 's' with four elements: [1, 2, 3, 4]. The elements should be associated with index labels 'a', 'b', 'c', and 'd', respectively. This means that each element should be accessed using its corresponding index label.

In [7]:
s = pd.Series([1,2,3,4], index = ["a","b","c","d"])
s

a    1
b    2
c    3
d    4
dtype: int64

# Retrieve the value associated with the index label "a" from the Series 's'. In this case, it would return the value 1.

In [8]:
s["a"]

1

# Create a new Series object named 's2' by reindexing the original Series 's'. The elements of 's2' should be arranged according to the given index labels: "b", "d", "a", "c", and "e". If an index label is missing in the original Series, such as "e" in this case, the corresponding value in 's2' should be NaN (not a number) to indicate the missing value.

In [9]:
s2 = s.reindex(["b", "d", "a", "c", "e"])
s2

b    2.0
d    4.0
a    1.0
c    3.0
e    NaN
dtype: float64

# Create a pandas Series object named 's3' with three elements: ["blue", "yellow", "purple"]. These elements should be associated with index labels [0, 2, 4], respectively. This means that each element should be accessed using its corresponding index label

In [11]:
s3 = pd.Series(["Blue", "Yellow","Purple"],
              index = [0,2,4])
s3

0      Blue
2    Yellow
4    Purple
dtype: object

# Reindex the Series 's3' using a new index range from 0 to 5. and Using Ffil method , for any missing values in index labels in "s3" fill the values from the nearest available value.

In [12]:
#Ffil = forward fill (chose the nearest value)

s3.reindex(range(6), method="ffill")

0      Blue
1      Blue
2    Yellow
3    Yellow
4    Purple
5    Purple
dtype: object

# Create a pandas DataFrame object named 'df' with three rows and three columns. The values in the DataFrame are generated using NumPy's arange function to create an array of numbers from 0 to 8, which is then reshaped into a 3x3 matrix.

In [15]:
df= pd.DataFrame(np.arange(9).reshape(3,3),
                index = ['a','c','d'],
                columns = ["Tim", "Tom", "Kate"])
df

Unnamed: 0,Tim,Tom,Kate
a,0,1,2
c,3,4,5
d,6,7,8


# Create a new DataFrame named 'df2' by reindexing the original DataFrame 'df'. The rows of 'df2' should arranged according to the given index labels: "d", "c", "b", and "a".

In [16]:
df2 = df.reindex(["d","c","b","a"])
df2

Unnamed: 0,Tim,Tom,Kate
d,6.0,7.0,8.0
c,3.0,4.0,5.0
b,,,
a,0.0,1.0,2.0


# Create a new DataFrame by reindexing the columns of the original DataFrame 'df' based on the given list of column names ["Kate", "Tim", "Tom"].

In [17]:
names = ["Kate", "Tim", "Tom"]
df.reindex(columns = names)

Unnamed: 0,Kate,Tim,Tom
a,2,0,1
c,5,3,4
d,8,6,7


# Task

# Retrieve specific rows from the DataFrame 'df' based on the given row labels ["c", "d", "a"].

In [19]:
df.loc[["c", "d", "a"]]

Unnamed: 0,Tim,Tom,Kate
c,3,4,5
d,6,7,8
a,0,1,2


# Create a pandas Series object named 's' with five elements generated using NumPy's arange function. The values in the Series range from 0.0 to 4.0. The elements are associated with index labels ["a", "b", "c", "d", "e"], respectively. This means that each element can be accessed using its corresponding index label. 

In [20]:
s = pd.Series(np.arange(5.),
             index = ["a", 'b','c','d','e'])
s

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

# Create a new Series object named 'new_s' by removing the element associated with the index label "b" from the original Series 's'.

In [21]:
new_s = s.drop("b")
new_s

a    0.0
c    2.0
d    3.0
e    4.0
dtype: float64

# Task

# Create a new Series object by removing the elements associated with the index labels "c" and "d" from the original Series 's'.

In [22]:
s.drop(["c","d"])

a    0.0
b    1.0
e    4.0
dtype: float64

# Create a pandas DataFrame object named 'data' with four rows and four columns. The values in the DataFrame should be generated using NumPy's arange function to create an array of numbers from 0 to 15, which then must be reshaped into a 4x4 matrix.

In [24]:
data = pd.DataFrame(np.arange(16).reshape(4,4),
                   index = ["Kate", "Tim","Tom","Alex"],
                   columns = list("ABCD"))
data

Unnamed: 0,A,B,C,D
Kate,0,1,2,3
Tim,4,5,6,7
Tom,8,9,10,11
Alex,12,13,14,15


# Create a new DataFrame by removing the rows associated with the index labels "Kate" and "Tim" from the original DataFrame 'data'.

In [26]:
data1 = data.drop(["Kate", "Tim"])
data1

Unnamed: 0,A,B,C,D
Tom,8,9,10,11
Alex,12,13,14,15


# Create a new DataFrame by removing the column associated with the label "A" from the original DataFrame 'data'.

In [29]:
data2 = data.drop("A" , axis = 1)
data2

Unnamed: 0,B,C,D
Kate,1,2,3
Tim,5,6,7
Tom,9,10,11
Alex,13,14,15


# Create a new DataFrame by removing the row associated with the index label "Kate" from the original DataFrame 'data'.

In [31]:
data.drop("Kate", axis = 0)

Unnamed: 0,A,B,C,D
Tim,4,5,6,7
Tom,8,9,10,11
Alex,12,13,14,15


# Task

# Calculate the mean value along the index (rows) of the DataFrame 'data'.

In [34]:
data.mean(axis = 0)

A    6.0
B    7.0
C    8.0
D    9.0
dtype: float64

# Calculate the mean value along the columns of the DataFrame 'data'.

In [35]:
data.mean(axis= 1)

Kate     1.5
Tim      5.5
Tom      9.5
Alex    13.5
dtype: float64