# Pandas - Dataframe , special object to store data in tabular form(rows & columns)

In [1]:
!pip install pandas



In [1]:
import numpy as np
import pandas as pd

In [2]:
# Trying to create data frame
user_data = {
    "MarksA":np.random.randint(1,100,5),
    "MarksB":np.random.randint(50,100,5),
    "MarksC":np.random.randint(1,100,5),
}

print(user_data)

{'MarksA': array([14, 51, 73, 76, 35]), 'MarksB': array([58, 81, 60, 86, 80]), 'MarksC': array([84, 59, 91, 96, 52])}


In [3]:
df = pd.DataFrame(user_data , dtype='float32')  # We may convert the integer data into into float 32 bit
print(df)

   MarksA  MarksB  MarksC
0    14.0    58.0    84.0
1    51.0    81.0    59.0
2    73.0    60.0    91.0
3    76.0    86.0    96.0
4    35.0    80.0    52.0


In [4]:
print(df.head()) # By default it shows us first five rows , but we can make it show how many we want
df.head(n=3)

   MarksA  MarksB  MarksC
0    14.0    58.0    84.0
1    51.0    81.0    59.0
2    73.0    60.0    91.0
3    76.0    86.0    96.0
4    35.0    80.0    52.0


Unnamed: 0,MarksA,MarksB,MarksC
0,14.0,58.0,84.0
1,51.0,81.0,59.0
2,73.0,60.0,91.0


In [5]:
df.columns # this will give the headers

Index(['MarksA', 'MarksB', 'MarksC'], dtype='object')

# Creating CSV from the dataframe

In [6]:
df.to_csv('marks.csv')

In [7]:
my_data = pd.read_csv('marks.csv')
print(my_data)
# 1 column is extra so we can remove it by drop
my_data = my_data.drop(columns=['Unnamed: 0'])
print(my_data)

   Unnamed: 0  MarksA  MarksB  MarksC
0           0    14.0    58.0    84.0
1           1    51.0    81.0    59.0
2           2    73.0    60.0    91.0
3           3    76.0    86.0    96.0
4           4    35.0    80.0    52.0
   MarksA  MarksB  MarksC
0    14.0    58.0    84.0
1    51.0    81.0    59.0
2    73.0    60.0    91.0
3    76.0    86.0    96.0
4    35.0    80.0    52.0


In [8]:
my_data.describe()

Unnamed: 0,MarksA,MarksB,MarksC
count,5.0,5.0,5.0
mean,49.8,73.0,76.4
std,26.109385,13.0,19.705329
min,14.0,58.0,52.0
25%,35.0,60.0,59.0
50%,51.0,80.0,84.0
75%,73.0,81.0,91.0
max,76.0,86.0,96.0


In [10]:
my_data.tail(n=3) # Last 3 rows of the data

Unnamed: 0,MarksA,MarksB,MarksC
2,73.0,60.0,91.0
3,76.0,86.0,96.0
4,35.0,80.0,52.0


In [12]:
# Row , this will show 3rd row
df.iloc[3] 

MarksA    76.0
MarksB    86.0
MarksC    96.0
Name: 3, dtype: float32

In [15]:
# 3rd row 1st column
print(df.iloc[3,1])
print(df.iloc[3][1])
# If index is not known to us then we will use 
idx = df.columns.get_loc('MarksB')
print(idx)
print(df.iloc[3][idx])

86.0
86.0
1
86.0


In [16]:
# We can list down too
idx = [df.columns.get_loc('MarksB'),df.columns.get_loc('MarksC')]
print(idx)
print(df.iloc[3,idx])

[1, 2]
MarksB    86.0
MarksC    96.0
Name: 3, dtype: float32


In [17]:
# Sorting of dataframe based  upon marks
print(my_data)


   MarksA  MarksB  MarksC
0    14.0    58.0    84.0
1    51.0    81.0    59.0
2    73.0    60.0    91.0
3    76.0    86.0    96.0
4    35.0    80.0    52.0


In [20]:
my_data.sort_values(by=["MarksA"],ascending=True) # If we do ascending = False then it will sort in descending order

Unnamed: 0,MarksA,MarksB,MarksC
0,14.0,58.0,84.0
4,35.0,80.0,52.0
1,51.0,81.0,59.0
2,73.0,60.0,91.0
3,76.0,86.0,96.0


In [21]:
data_array = my_data.values # Creating numpy array , as simple 2d array
print(type(data_array))
print(data_array)
print(data_array.shape)

<class 'numpy.ndarray'>
[[14. 58. 84.]
 [51. 81. 59.]
 [73. 60. 91.]
 [76. 86. 96.]
 [35. 80. 52.]]
(5, 3)


In [22]:
# We can also convert numpyt array to dataframe
new_df = pd.DataFrame(data_array,dtype='int32',columns=["Physics","Chem","Maths"])
print(new_df)

   Physics  Chem  Maths
0       14    58     84
1       51    81     59
2       73    60     91
3       76    86     96
4       35    80     52


In [23]:
# We can make it CSV
new_df.to_csv("PCM.csv",index=False)

In [24]:
pcm = pd.read_csv('PCM.csv')
print(pcm)

   Physics  Chem  Maths
0       14    58     84
1       51    81     59
2       73    60     91
3       76    86     96
4       35    80     52
