In [2]:
import pandas as pd
import numpy as np

# pandas contains - Series and DataFrame

In [3]:
# series: 

array = np.arange(10, 20, 3.5)
series = pd.Series(array)
print(series)

# indexing and slicing is possible:
print("index 2: ",series[2])

series = pd.Series(np.random.rand(5)) # creating 5 random variables
print(series)

# we can change indexes:
series.index = [10,11,12,13,14]
series

0    10.0
1    13.5
2    17.0
dtype: float64
index 2:  17.0
0    0.874679
1    0.425670
2    0.251775
3    0.839194
4    0.551467
dtype: float64


10    0.874679
11    0.425670
12    0.251775
13    0.839194
14    0.551467
dtype: float64

In [4]:
# data frame from series:

s1 = pd.Series(np.random.rand(4))
s2 = pd.Series(np.random.rand(4))
s3 = pd.Series(np.random.rand(4))

df = pd.DataFrame([s1, s2, s3])
print(df)

df = df.T
print(df)

          0         1         2         3
0  0.619833  0.650228  0.980229  0.878076
1  0.061046  0.323581  0.909017  0.072995
2  0.786499  0.631667  0.110123  0.645436
          0         1         2
0  0.619833  0.061046  0.786499
1  0.650228  0.323581  0.631667
2  0.980229  0.909017  0.110123
3  0.878076  0.072995  0.645436


In [5]:
# changing names of columns:

df.columns = ["col1", "col2", "col3"]
df

Unnamed: 0,col1,col2,col3
0,0.619833,0.061046,0.786499
1,0.650228,0.323581,0.631667
2,0.980229,0.909017,0.110123
3,0.878076,0.072995,0.645436


In [6]:
# creating df from dictionary:

dict = {"Name" : ["Peter Parker", "Gwen Stacy", "Miles Morales", "Miguel O'Hara"],
        "Universe": ["Earth 616", 'Earth 65', 'Earth 1610', 'Earth 2099'],
        "spidyName": ['spiderman', 'spiderwoman', 'Anomaly', 'vampire-spider'],
        "Weight": [75, 66, 64, 115]}

spidy = pd.DataFrame(dict)
print(spidy,"\n")

# adding a column:
spidy['power'] = [np.nan, 'Cool', 'Invisible', 'claws']
print(spidy)

# dropping a column:
spidy = spidy.drop(['power'], axis=1) # axis=1 -> col, axis=0 -> row
spidy 


            Name    Universe       spidyName  Weight
0   Peter Parker   Earth 616       spiderman      75
1     Gwen Stacy    Earth 65     spiderwoman      66
2  Miles Morales  Earth 1610         Anomaly      64
3  Miguel O'Hara  Earth 2099  vampire-spider     115 

            Name    Universe       spidyName  Weight      power
0   Peter Parker   Earth 616       spiderman      75        NaN
1     Gwen Stacy    Earth 65     spiderwoman      66       Cool
2  Miles Morales  Earth 1610         Anomaly      64  Invisible
3  Miguel O'Hara  Earth 2099  vampire-spider     115      claws


Unnamed: 0,Name,Universe,spidyName,Weight
0,Peter Parker,Earth 616,spiderman,75
1,Gwen Stacy,Earth 65,spiderwoman,66
2,Miles Morales,Earth 1610,Anomaly,64
3,Miguel O'Hara,Earth 2099,vampire-spider,115


In [7]:
# Location (loc) & Index location (iloc) ---> Syntax = loc[:,:], iloc[:,:]

print(spidy.loc[0:2,'Name':'Universe'], "\n")

print(spidy.iloc[1:3,2:])


            Name    Universe
0   Peter Parker   Earth 616
1     Gwen Stacy    Earth 65
2  Miles Morales  Earth 1610 

     spidyName  Weight
1  spiderwoman      66
2      Anomaly      64


In [8]:
# adding a row with loc:

spidy.loc[4] = ['Pavitr', 'Mubbatan', 'Spidy-India', 75]
spidy

Unnamed: 0,Name,Universe,spidyName,Weight
0,Peter Parker,Earth 616,spiderman,75
1,Gwen Stacy,Earth 65,spiderwoman,66
2,Miles Morales,Earth 1610,Anomaly,64
3,Miguel O'Hara,Earth 2099,vampire-spider,115
4,Pavitr,Mubbatan,Spidy-India,75


In [9]:
# finding null values:
print(spidy.isnull())

print(spidy.isnull().sum())


    Name  Universe  spidyName  Weight
0  False     False      False   False
1  False     False      False   False
2  False     False      False   False
3  False     False      False   False
4  False     False      False   False
Name         0
Universe     0
spidyName    0
Weight       0
dtype: int64


In [10]:
# calling a specific column:
print(spidy['Name'])

print(spidy['Name'].dtype)


0     Peter Parker
1       Gwen Stacy
2    Miles Morales
3    Miguel O'Hara
4           Pavitr
Name: Name, dtype: object
object


In [11]:
# info:
spidy.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Name       5 non-null      object
 1   Universe   5 non-null      object
 2   spidyName  5 non-null      object
 3   Weight     5 non-null      int64 
dtypes: int64(1), object(3)
memory usage: 200.0+ bytes


In [12]:
# describe:
spidy.describe()

Unnamed: 0,Weight
count,5.0
mean,79.0
std,20.748494
min,64.0
25%,66.0
50%,75.0
75%,75.0
max,115.0


In [13]:
#describe all:
spidy.describe(include='all')

Unnamed: 0,Name,Universe,spidyName,Weight
count,5,5,5,5.0
unique,5,5,5,
top,Peter Parker,Earth 616,spiderman,
freq,1,1,1,
mean,,,,79.0
std,,,,20.748494
min,,,,64.0
25%,,,,66.0
50%,,,,75.0
75%,,,,75.0
