# DataFrames

In [1]:
import pandas as pd
import numpy as np

In [2]:
dict1 = {"name" : ['A','B','C'] , 
         "marks" : [10,20,30] , 
         "city" : ['blr' , 'kol', 'del']
        }

In [3]:
df = pd.DataFrame(dict1)

In [4]:
df

Unnamed: 0,name,marks,city
0,A,10,blr
1,B,20,kol
2,C,30,del


In [5]:
df.to_csv('marks.csv')

In [6]:
df.to_csv('markswithoutindex.csv', index=False)

In [7]:
df.head(2)

Unnamed: 0,name,marks,city
0,A,10,blr
1,B,20,kol


In [8]:
df.tail(2)

Unnamed: 0,name,marks,city
1,B,20,kol
2,C,30,del


In [9]:
df.describe()

Unnamed: 0,marks
count,3.0
mean,20.0
std,10.0
min,10.0
25%,15.0
50%,20.0
75%,25.0
max,30.0


In [10]:
sw = pd.read_csv('markswithoutindex.csv')

In [11]:
sw

Unnamed: 0,name,marks,city
0,A,10,blr
1,B,20,kol
2,C,30,del


In [12]:
sw['marks'][0]

10

In [13]:
sw['marks'][0] = 5

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sw['marks'][0] = 5


In [14]:
sw

Unnamed: 0,name,marks,city
0,A,5,blr
1,B,20,kol
2,C,30,del


In [15]:
sw.index = ['first', 'second', 'third']

In [16]:
sw

Unnamed: 0,name,marks,city
first,A,5,blr
second,B,20,kol
third,C,30,del


# Series

In [17]:
ser = pd.Series(np.random.rand(4))

In [18]:
ser

0    0.070922
1    0.139839
2    0.082806
3    0.810796
dtype: float64

In [19]:
type(ser)

pandas.core.series.Series

In [20]:
newdf = pd.DataFrame(np.random.rand(3,2), index=np.arange(3))

In [21]:
type(newdf)

pandas.core.frame.DataFrame

In [22]:
newdf

Unnamed: 0,0,1
0,0.208482,0.732475
1,0.435801,0.551486
2,0.330077,0.294804


In [23]:
newdf.dtypes

0    float64
1    float64
dtype: object

In [24]:
newdf[0][0]

0.20848242907372316

In [25]:
newdf.index

Int64Index([0, 1, 2], dtype='int64')

In [26]:
newdf.columns

RangeIndex(start=0, stop=2, step=1)

In [27]:
newdf.to_numpy()

array([[0.20848243, 0.73247488],
       [0.43580089, 0.55148576],
       [0.33007712, 0.29480388]])

In [28]:
#transpose matrix from 3*2 to 2*3
newdf.T

Unnamed: 0,0,1,2
0,0.208482,0.435801,0.330077
1,0.732475,0.551486,0.294804


In [29]:
newdf.sort_index(axis=0, ascending=False) #axis is for row sorting

Unnamed: 0,0,1
2,0.330077,0.294804
1,0.435801,0.551486
0,0.208482,0.732475


In [30]:
type(newdf[0])

pandas.core.series.Series

In [31]:
newdf2 = newdf #just points not copy

In [32]:
newdf2[0] = 0.01

In [33]:
newdf

Unnamed: 0,0,1
0,0.01,0.732475
1,0.01,0.551486
2,0.01,0.294804


In [34]:
newdf.loc[0,0] = 2

In [35]:
newdf

Unnamed: 0,0,1
0,2.0,0.732475
1,0.01,0.551486
2,0.01,0.294804


In [36]:
newdf.drop(0, axis=1)

Unnamed: 0,1
0,0.732475
1,0.551486
2,0.294804


In [37]:
 newdf


Unnamed: 0,0,1
0,2.0,0.732475
1,0.01,0.551486
2,0.01,0.294804


In [38]:
newdf.loc[[1,2], :]

Unnamed: 0,0,1
1,0.01,0.551486
2,0.01,0.294804


In [39]:
newdf.loc[(newdf[0]<1) & (newdf[1]>0.5)]

Unnamed: 0,0,1
1,0.01,0.551486


In [40]:
newdf

Unnamed: 0,0,1
0,2.0,0.732475
1,0.01,0.551486
2,0.01,0.294804


In [41]:
newdf.iloc[0,1]

0.7324748774844233

In [42]:
newdf.iloc[[0,1], [1,1]]

Unnamed: 0,1,1.1
0,0.732475,0.732475
1,0.551486,0.551486


In [43]:
newdf.drop([1])

Unnamed: 0,0,1
0,2.0,0.732475
2,0.01,0.294804


In [44]:
#to drop permanently
#newdf = newdf.drop([1])   OR
#newdf.drop([1], inplace=True)

In [45]:
newdf

Unnamed: 0,0,1
0,2.0,0.732475
1,0.01,0.551486
2,0.01,0.294804


In [46]:
newdf.loc[:,[0]] = 0.5

In [47]:
newdf

Unnamed: 0,0,1
0,0.5,0.732475
1,0.5,0.551486
2,0.5,0.294804


In [48]:
newdf.drop_duplicates(subset=[0])

Unnamed: 0,0,1
0,0.5,0.732475


In [49]:
newdf.shape

(3, 2)

In [50]:
newdf[0].value_counts()

0.5    3
Name: 0, dtype: int64

In [51]:
newdf.isnull()

Unnamed: 0,0,1
0,False,False
1,False,False
2,False,False


In [52]:
newdf.max()

0    0.500000
1    0.732475
dtype: float64

In [53]:
newdf.mean()

0    0.500000
1    0.526255
dtype: float64

In [54]:
newdf.std()

0    0.000000
1    0.219924
dtype: float64