# Create Read Update Delete

In [1]:
import pandas as pd

## 1. Create

### 1.1 Create a DataFrame from a CSV file

In [2]:
df = pd.read_csv('Top 50.csv', encoding= 'unicode_escape')

### 1.2 Create a DataFrame from a Dictionary

In [3]:
my_dict = {'col1':[1,2,3], 'col2':[4,5,6], 'col3':[7,8,9]}

In [4]:
dict_df = pd.DataFrame(my_dict)

## 2. Read

### 2.1 Show top 5 and bottom 5 rows of data

In [5]:
df.head()

Unnamed: 0,Rank,Account username,Account Name,Followers (millions),Occupation,Country
0,1,@BarackObama,Barack Obama,132.0,44th President of the United States of America,United States
1,2,@justinbieber,Justin Bieber,114.2,Musician,Canada
2,3,@katyperry,Katy Perry,108.8,Musician,United States
3,4,@rihanna,Rihanna,106.7,Musician,Barbados
4,5,@Cristiano,Cristiano Ronaldo,100.6,Football player,Portugal


In [6]:
df.tail()

Unnamed: 0,Rank,Account username,Account Name,Followers (millions),Occupation,Country
45,46,@sportscenter,SportsCenter,40.0,Sports channel,United States
46,47,@KylieJenner,Kylie Jenner,39.9,Television personality,United States
47,48,@drake,Drake,39.5,Rapper,Canada
48,49,@bts_bighit,BTS,39.2,Musicians/K-pop,South Korea
49,50,@Harry_Styles,Harry Styles,37.7,Musician,United Kingdom


### 2.2 Show Columns and Data Type

In [7]:
df.columns

Index(['Rank', 'Account username', 'Account Name', 'Followers (millions)',
       'Occupation', 'Country'],
      dtype='object')

In [8]:
df.dtypes

Rank                      int64
Account username         object
Account Name             object
Followers (millions)    float64
Occupation               object
Country                  object
dtype: object

### 2.3 Summary Statistics

In [9]:
df.describe()

Unnamed: 0,Rank,Followers (millions)
count,50.0,50.0
mean,25.5,61.348
std,14.57738,23.214837
min,1.0,37.7
25%,13.25,43.425
50%,25.5,52.9
75%,37.75,74.525
max,50.0,132.0


In [10]:
df.describe(include='object')

Unnamed: 0,Account username,Account Name,Occupation,Country
count,50,50,50,50
unique,50,49,25,12
top,@BarackObama,BTS,Musician,United States
freq,1,2,9,29


### 2.4 Filtering Columns

In [11]:
df.Occupation

0     44th President of the United States of America
1                                           Musician
2                                           Musician
3                                           Musician
4                                    Football player
5                      Engineer and Business magnate
6                                           Musician
7     45th President of the United States of America
8                               Musician and actress
9                               Musician and actress
10                           Prime Minister of India
11                      Comedian and television host
12                     Online video sharing platform
13                            Television personality
14                              Musician and actress
15                                Musician and actor
16                                      News channel
17                             Social media platform
18                    Businessman and philanth

In [12]:
df[['Account Name', 'Occupation']]

Unnamed: 0,Account Name,Occupation
0,Barack Obama,44th President of the United States of America
1,Justin Bieber,Musician
2,Katy Perry,Musician
3,Rihanna,Musician
4,Cristiano Ronaldo,Football player
5,Elon Musk,Engineer and Business magnate
6,Taylor Swift,Musician
7,Donald Trump,45th President of the United States of America
8,Ariana Grande,Musician and actress
9,Lady Gaga,Musician and actress


### 2.5 Filtering on Rows

In [13]:
df[df['Account username']=='@NASA']

Unnamed: 0,Rank,Account username,Account Name,Followers (millions),Occupation,Country
20,21,@NASA,NASA,57.3,American space agency,United States


### 2.6 Indexing rows with iloc

In [14]:
df.iloc[1]

Rank                                2
Account username        @justinbieber
Account Name            Justin Bieber
Followers (millions)            114.2
Occupation                   Musician
Country                        Canada
Name: 1, dtype: object

In [15]:
df.iloc[1,2]

'Justin Bieber'

In [16]:
df.iloc[0:4]

Unnamed: 0,Rank,Account username,Account Name,Followers (millions),Occupation,Country
0,1,@BarackObama,Barack Obama,132.0,44th President of the United States of America,United States
1,2,@justinbieber,Justin Bieber,114.2,Musician,Canada
2,3,@katyperry,Katy Perry,108.8,Musician,United States
3,4,@rihanna,Rihanna,106.7,Musician,Barbados


### 2.7 Indexing with loc

In [17]:
indexed_df = df.copy()
indexed_df.set_index('Rank', inplace=True)

In [18]:
indexed_df.head()

Unnamed: 0_level_0,Account username,Account Name,Followers (millions),Occupation,Country
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,@BarackObama,Barack Obama,132.0,44th President of the United States of America,United States
2,@justinbieber,Justin Bieber,114.2,Musician,Canada
3,@katyperry,Katy Perry,108.8,Musician,United States
4,@rihanna,Rihanna,106.7,Musician,Barbados
5,@Cristiano,Cristiano Ronaldo,100.6,Football player,Portugal


In [19]:
indexed_df.loc[5]

Account username               @Cristiano
Account Name            Cristiano Ronaldo
Followers (millions)                100.6
Occupation                Football player
Country                          Portugal
Name: 5, dtype: object

## 3. Update

### 3.1 Dropping Rows

In [20]:
df.drop(12,axis=0)

Unnamed: 0,Rank,Account username,Account Name,Followers (millions),Occupation,Country
0,1,@BarackObama,Barack Obama,132.0,44th President of the United States of America,United States
1,2,@justinbieber,Justin Bieber,114.2,Musician,Canada
2,3,@katyperry,Katy Perry,108.8,Musician,United States
3,4,@rihanna,Rihanna,106.7,Musician,Barbados
4,5,@Cristiano,Cristiano Ronaldo,100.6,Football player,Portugal
5,6,@elonmusk,Elon Musk,96.2,Engineer and Business magnate,United States
6,7,@taylorswift13,Taylor Swift,90.3,Musician,United States
7,8,@realDonaldTrump[3],Donald Trump,88.8,45th President of the United States of America,United States
8,9,@ArianaGrande[4],Ariana Grande,85.3,Musician and actress,United States
9,10,@ladygaga,Lady Gaga,84.7,Musician and actress,United States


### 3.2 Dropping Columns

In [21]:
df.drop('Account username', axis=1)

Unnamed: 0,Rank,Account Name,Followers (millions),Occupation,Country
0,1,Barack Obama,132.0,44th President of the United States of America,United States
1,2,Justin Bieber,114.2,Musician,Canada
2,3,Katy Perry,108.8,Musician,United States
3,4,Rihanna,106.7,Musician,Barbados
4,5,Cristiano Ronaldo,100.6,Football player,Portugal
5,6,Elon Musk,96.2,Engineer and Business magnate,United States
6,7,Taylor Swift,90.3,Musician,United States
7,8,Donald Trump,88.8,45th President of the United States of America,United States
8,9,Ariana Grande,85.3,Musician and actress,United States
9,10,Lady Gaga,84.7,Musician and actress,United States


### 3.3 Creating Columns

In [22]:
df['n followers'] = df['Followers (millions)'] * 1000000

In [23]:
df.head()

Unnamed: 0,Rank,Account username,Account Name,Followers (millions),Occupation,Country,n followers
0,1,@BarackObama,Barack Obama,132.0,44th President of the United States of America,United States,132000000.0
1,2,@justinbieber,Justin Bieber,114.2,Musician,Canada,114200000.0
2,3,@katyperry,Katy Perry,108.8,Musician,United States,108800000.0
3,4,@rihanna,Rihanna,106.7,Musician,Barbados,106700000.0
4,5,@Cristiano,Cristiano Ronaldo,100.6,Football player,Portugal,100600000.0


### 3.4 Updating Columns

In [24]:
df['n followers'] = 'Followers (millions) * 1000000'

In [25]:
df.head()

Unnamed: 0,Rank,Account username,Account Name,Followers (millions),Occupation,Country,n followers
0,1,@BarackObama,Barack Obama,132.0,44th President of the United States of America,United States,Followers (millions) * 1000000
1,2,@justinbieber,Justin Bieber,114.2,Musician,Canada,Followers (millions) * 1000000
2,3,@katyperry,Katy Perry,108.8,Musician,United States,Followers (millions) * 1000000
3,4,@rihanna,Rihanna,106.7,Musician,Barbados,Followers (millions) * 1000000
4,5,@Cristiano,Cristiano Ronaldo,100.6,Football player,Portugal,Followers (millions) * 1000000


### 3.5 Updating a Single Value

In [26]:
df.iloc[0,3] = 'THE MOST'

In [27]:
df.head()

Unnamed: 0,Rank,Account username,Account Name,Followers (millions),Occupation,Country,n followers
0,1,@BarackObama,Barack Obama,THE MOST,44th President of the United States of America,United States,Followers (millions) * 1000000
1,2,@justinbieber,Justin Bieber,114.2,Musician,Canada,Followers (millions) * 1000000
2,3,@katyperry,Katy Perry,108.8,Musician,United States,Followers (millions) * 1000000
3,4,@rihanna,Rihanna,106.7,Musician,Barbados,Followers (millions) * 1000000
4,5,@Cristiano,Cristiano Ronaldo,100.6,Football player,Portugal,Followers (millions) * 1000000


### Condition based Updating using Apply

In [33]:
df.iloc[0,5]

'\xa0United States'

In [34]:
df['American'] = df['Country'].apply(lambda x : 1 if x=='\xa0United States' else 0)

In [35]:
df[df['American']==True].head()

Unnamed: 0,Rank,Account username,Account Name,Followers (millions),Occupation,Country,n followers,American
0,1,@BarackObama,Barack Obama,THE MOST,44th President of the United States of America,United States,Followers (millions) * 1000000,1
2,3,@katyperry,Katy Perry,108.8,Musician,United States,Followers (millions) * 1000000,1
5,6,@elonmusk,Elon Musk,96.2,Engineer and Business magnate,United States,Followers (millions) * 1000000,1
6,7,@taylorswift13,Taylor Swift,90.3,Musician,United States,Followers (millions) * 1000000,1
7,8,@realDonaldTrump[3],Donald Trump,88.8,45th President of the United States of America,United States,Followers (millions) * 1000000,1


## 4. Delete/Output

### 4.1 Output to CSV

In [36]:
df.to_csv('output.csv')

### 4.2 Output to JSON

In [37]:
df.to_json()

'{"Rank":{"0":1,"1":2,"2":3,"3":4,"4":5,"5":6,"6":7,"7":8,"8":9,"9":10,"10":11,"11":12,"12":13,"13":14,"14":15,"15":16,"16":17,"17":18,"18":19,"19":20,"20":21,"21":22,"22":23,"23":24,"24":25,"25":26,"26":27,"27":28,"28":29,"29":30,"30":31,"31":32,"32":33,"33":34,"34":35,"35":36,"36":37,"37":38,"38":39,"39":40,"40":41,"41":42,"42":43,"43":44,"44":45,"45":46,"46":47,"47":48,"48":49,"49":50},"Account username":{"0":"@BarackObama","1":"@justinbieber","2":"@katyperry","3":"@rihanna","4":"@Cristiano","5":"@elonmusk","6":"@taylorswift13","7":"@realDonaldTrump[3]","8":"@ArianaGrande[4]","9":"@ladygaga","10":"@narendramodi","11":"@TheEllenShow","12":"@YouTube","13":"@KimKardashian","14":"@selenagomez","15":"@jtimberlake","16":"@cnnbrk","17":"@Twitter[a]","18":"@BillGates","19":"@CNN","20":"@NASA","21":"@neymarjr","22":"@britneyspears","23":"@ddlovato","24":"@nytimes","25":"@shakira","26":"@jimmyfallon","27":"@kingjames","28":"@bbcbreaking","29":"@pmoindia","30":"@imvkohli","31":"@srbachchan","3

### 4.3 Output to HTML

In [38]:
df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Rank</th>\n      <th>Account username</th>\n      <th>Account Name</th>\n      <th>Followers (millions)</th>\n      <th>Occupation</th>\n      <th>Country</th>\n      <th>n followers</th>\n      <th>American</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>@BarackObama</td>\n      <td>Barack Obama</td>\n      <td>THE MOST</td>\n      <td>44th President of the United States of America</td>\n      <td>United States</td>\n      <td>Followers (millions) * 1000000</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2</td>\n      <td>@justinbieber</td>\n      <td>Justin Bieber</td>\n      <td>114.2</td>\n      <td>Musician</td>\n      <td>Canada</td>\n      <td>Followers (millions) * 1000000</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>3</td>\n      <td>@katyperry</td>\n      <td>Katy 

### 4.4 Delete a DataFrame

In [None]:
del df