# pandas

pandas is an open source ,BSD-licensed library providing high-performance, easy-to-use data struture and data analysis tools for the python programming language.

In [2]:
import pandas as pd
import numpy as np

In [9]:
df=pd.DataFrame(np.arange(0,20).reshape(5,4),index=['row1','row2','row3','row4','row5'],columns=["column1","column2","column3","column4"])
print(df)

      column1  column2  column3  column4
row1        0        1        2        3
row2        4        5        6        7
row3        8        9       10       11
row4       12       13       14       15
row5       16       17       18       19


In [9]:
df.head()

Unnamed: 0,column1,column2,column3,column4
row1,0,1,2,3
row2,4,5,6,7
row3,8,9,10,11
row4,12,13,14,15
row5,16,17,18,19


In [10]:
df.to_csv('Test1.csv')

In [11]:
df.loc['row1']

column1    0
column2    1
column3    2
column4    3
Name: row1, dtype: int32

In [8]:
df.iloc[0:3,1:3]

Unnamed: 0,column2,column3
row1,1,2
row2,5,6
row3,9,10


In [9]:
df.iloc[:,1:].values.shape

(5, 3)

In [15]:
df.iloc[:,1:].values

array([[ 1,  2,  3],
       [ 5,  6,  7],
       [ 9, 10, 11],
       [13, 14, 15],
       [17, 18, 19]])

In [16]:
df.isnull().sum

<bound method NDFrame._add_numeric_operations.<locals>.sum of       column1  column2  column3  column4
row1    False    False    False    False
row2    False    False    False    False
row3    False    False    False    False
row4    False    False    False    False
row5    False    False    False    False>

In [18]:
df['column1'].value_counts()

0     1
4     1
8     1
12    1
16    1
Name: column1, dtype: int64

In [19]:
df['column1'].unique()

array([ 0,  4,  8, 12, 16])

In [20]:
df[['column1','column2','column3']]

Unnamed: 0,column1,column2,column3
row1,0,1,2
row2,4,5,6
row3,8,9,10
row4,12,13,14
row5,16,17,18


In [22]:
df=pd.read_csv('Test1.csv')

In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  5 non-null      object 
 1   column1     0 non-null      float64
 2   column2     0 non-null      float64
 3   column3     0 non-null      float64
 4   column4     0 non-null      float64
dtypes: float64(4), object(1)
memory usage: 328.0+ bytes


In [24]:
df.describe()

Unnamed: 0,column1,column2,column3,column4
count,0.0,0.0,0.0,0.0
mean,,,,
std,,,,
min,,,,
25%,,,,
50%,,,,
75%,,,,
max,,,,


In [8]:
test_df=pd.read_csv('test1.csv',sep=';')
print(test_df)

               ,column1,column2,column3,column4
row1 0  1  2                                  3
row2 4  5  6                                  7
row3 8  9  10                                11
row4 12 13 14                                15
row5 16 17 18                                19


In [26]:
test_df.head()

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,",column1,column2,column3,column4"
row1,0,1,2,3
row2,4,5,6,7
row3,8,9,10,11
row4,12,13,14,15
row5,16,17,18,19


# csv

In [30]:
from io import StringIO

In [7]:
data=('a,b,c\n'
     '4,apple,bat,\n'
     '8,orange,cow,')

In [8]:
pd.read_csv(StringIO(data))

Unnamed: 0,a,b,c
4,apple,bat,
8,orange,cow,


In [9]:
pd.read_csv(StringIO(data),index_col=False)

Unnamed: 0,a,b,c
0,4,apple,bat
1,8,orange,cow


In [10]:
data=('a,b,c\n'
     '4,apple,bat,\n'
     '8,orange,cow,')

In [11]:
pd.read_csv(StringIO(data), usecols=['b','c'],index_col=False)

Unnamed: 0,b,c
0,apple,bat
1,orange,cow


In [38]:
#quoting ans escape charecters. very useful in NLP

data='a,b\n"hello, \\Bob\\",nice to meet you",5'

In [40]:
pd.read_csv(StringIO(data),escapechar='\\')

Unnamed: 0,a,b
0,"hello, Bob"",nice to meet you",5


In [12]:
df=pd.read_csv('https://download.bls.gov/pub/time.series/cu/cu.item',sep='\t')

In [13]:
df.head()

Unnamed: 0,item_code,item_name,display_level,selectable,sort_sequence
0,AA0,All items - old base,0,T,2
1,AA0R,Purchasing power of the consumer dollar - old ...,0,T,400
2,SA0,All items,0,T,1
3,SA0E,Energy,1,T,375
4,SA0L1,All items less food,1,T,359


# Read Json to CSV

In [41]:
Data = '{"emplyee_name": "james","email":"james@gmail.com","job_profile":[{"title1":"team lead","title2":"sr.develpoer"}]}'
pd.read_json(Data)

Unnamed: 0,emplyee_name,email,job_profile
0,james,james@gmail.com,"{'title1': 'team lead', 'title2': 'sr.develpoer'}"


In [43]:
df.to_json(orient="index")

'{"0":{"Unnamed: 0":"row1;0;1;2;3","column1":null,"column2":null,"column3":null,"column4":null},"1":{"Unnamed: 0":"row2;4;5;6;7","column1":null,"column2":null,"column3":null,"column4":null},"2":{"Unnamed: 0":"row3;8;9;10;11","column1":null,"column2":null,"column3":null,"column4":null},"3":{"Unnamed: 0":"row4;12;13;14;15","column1":null,"column2":null,"column3":null,"column4":null},"4":{"Unnamed: 0":"row5;16;17;18;19","column1":null,"column2":null,"column3":null,"column4":null}}'

In [44]:
df.to_json(orient="records")

'[{"Unnamed: 0":"row1;0;1;2;3","column1":null,"column2":null,"column3":null,"column4":null},{"Unnamed: 0":"row2;4;5;6;7","column1":null,"column2":null,"column3":null,"column4":null},{"Unnamed: 0":"row3;8;9;10;11","column1":null,"column2":null,"column3":null,"column4":null},{"Unnamed: 0":"row4;12;13;14;15","column1":null,"column2":null,"column3":null,"column4":null},{"Unnamed: 0":"row5;16;17;18;19","column1":null,"column2":null,"column3":null,"column4":null}]'

In [47]:
url = 'https://www.fdic.gov/bank/individual/failed/banklist.html'

dfs = pd.read_html(url)



ImportError: html5lib not found, please install it