In [1]:
#how to create a data frame in pandas

In [1]:
import pandas as pd
import numpy as np

In [2]:
#specify column names and values using a dictionary
df = pd.DataFrame({'Resistivity-Ohms': [100,450,230,400],
                   'VES':['Wenner','Schlumberger','Dipole-Dipole', 'Wenner']})

In [3]:
df

Unnamed: 0,Resistivity-Ohms,VES
0,100,Wenner
1,450,Schlumberger
2,230,Dipole-Dipole
3,400,Wenner


In [18]:
#create data frame from random values
import pandas as pd
import numpy as np
df_rand = pd.DataFrame(np.random.randn(2,3), columns = ['A','B','C'])

In [19]:
df_rand

Unnamed: 0,A,B,C
0,1.331637,0.266748,1.379711
1,0.433043,1.337425,-1.713523


In [20]:
#Create data frame from mixed data types
import pandas.util.testing
pd.util.testing.makeTimeDataFrame().head()
#.makeMixedDataFrame and .makeDataFrame


Unnamed: 0,A,B,C,D
2000-01-03,-0.802057,0.092044,0.667642,1.424638
2000-01-04,-0.539003,-0.410572,-0.714754,-0.599529
2000-01-05,1.662361,-0.127493,0.085996,-1.737681
2000-01-06,0.752296,-0.28016,-0.658885,-0.406089
2000-01-07,-0.525944,0.547063,-2.381371,-0.957086


Need to create a time series dataset for testing? 
Use pd.util.testing.makeTimeDataFrame().

Need more control over the columns & data? 
Generate data with np.random & overwrite index with makeDateIndex().

In [24]:
num_rows = 1*24 #Number of hours in a day
sales = pd.util.testing.makeTimeDataFrame(num_rows, freq='H')
sales.head()

Unnamed: 0,A,B,C,D
2000-01-01 00:00:00,0.800189,0.749149,-0.114589,0.438451
2000-01-01 01:00:00,-1.77445,-1.377975,1.356092,-0.661794
2000-01-01 02:00:00,-1.815057,-0.897161,-0.892818,0.398855
2000-01-01 03:00:00,-0.69846,-0.776197,-0.477888,-0.823541
2000-01-01 04:00:00,-0.043305,0.710354,1.03329,0.932376


In [25]:
num_cols = 2 #Specify the number of columns
cols = ['Price of Items($)', 'Number of Items Sold']
df_sales = pd.DataFrame(np.random.randint(1, 200, 
                        size = (num_rows, num_cols)),
                        columns=cols)
df_sales.index = pd.util.testing.makeDateIndex(num_rows, freq = 'H')
df_sales.head()

Unnamed: 0,Price of Items($),Number of Items Sold
2000-01-01 00:00:00,122,82
2000-01-01 01:00:00,23,105
2000-01-01 02:00:00,19,164
2000-01-01 03:00:00,76,40
2000-01-01 04:00:00,96,104


In [33]:
import pandas as pd
df = pd.read_csv('../Data/table1.csv')
df

Unnamed: 0,country,year,cases,population
0,Afghanistan,1999,745,19987071
1,Afghanistan,2000,2666,20595360
2,Brazil,1999,37737,172006362
3,Brazil,2000,80488,174504898
4,China,1999,212258,1272915272
5,China,2000,213766,1280428583


In [34]:
df.rename({0:'Country'}) #Rename Index

Unnamed: 0,country,year,cases,population
Country,Afghanistan,1999,745,19987071
1,Afghanistan,2000,2666,20595360
2,Brazil,1999,37737,172006362
3,Brazil,2000,80488,174504898
4,China,1999,212258,1272915272
5,China,2000,213766,1280428583


In [36]:
df

Unnamed: 0,country,year,cases,population
0,Afghanistan,1999,745,19987071
1,Afghanistan,2000,2666,20595360
2,Brazil,1999,37737,172006362
3,Brazil,2000,80488,174504898
4,China,1999,212258,1272915272
5,China,2000,213766,1280428583


In [37]:
#Want to create new columns within a data frame? use df.assign()
#let's create new columns for population_decline and percent_cases
p_decline = df.assign(population_decline = df.population/df.cases, 
                          percent_cases= df.cases/df_new.population*100)
p_decline           

Unnamed: 0,country,year,cases,population,population_decline,percent_cases
0,Afghanistan,1999,745,19987071,26828.283221,0.003727
1,Afghanistan,2000,2666,20595360,7725.191298,0.012945
2,Brazil,1999,37737,172006362,4558.029573,0.021939
3,Brazil,2000,80488,174504898,2168.085901,0.046124
4,China,1999,212258,1272915272,5997.019062,0.016675
5,China,2000,213766,1280428583,5989.860796,0.016695


In [155]:
import seaborn as sns
from IPython.display import HTML
#set color map to sea born light green palette
cm = sns.light_palette('green', as_cmap = True)
(p_decline.style
        .set_caption('Population Decline from 1999 to 2000')
        .background_gradient(cmap=cm, subset = ['population_decline', 'percent_cases'])
        .format({'percent_cases':'{:,.2%}'})
)

Unnamed: 0,country,year,cases,population,population_decline,percent_cases
0,Afghanistan,1999,745,19987071,26828.283221,0.37%
1,Afghanistan,2000,2666,20595360,7725.191298,1.29%
2,Brazil,1999,37737,172006362,4558.029573,2.19%
3,Brazil,2000,80488,174504898,2168.085901,4.61%
4,China,1999,212258,1272915272,5997.019062,1.67%
5,China,2000,213766,1280428583,5989.860796,1.67%


In [178]:
#lets say we want all cases in case column below 3000 colored red, less than 10,000 coloured green and greater than 10,000 coloured black

def color_red(value):
    
    if value < 3000:
        color = 'red'
    elif value > 3000:
        color = 'green'
    else:
        color = 'black'
    
    return 'color:%s' % color
        
#You can then apply this function to your dataframe using the Styler objectâ€™s applymap() method:

p_decline.style.applymap(color_red, subset = ['cases'])
    

Unnamed: 0,country,year,cases,population,population_decline,percent_cases
0,Afghanistan,1999,745,19987071,26828.283221,0.003727
1,Afghanistan,2000,2666,20595360,7725.191298,0.012945
2,Brazil,1999,37737,172006362,4558.029573,0.021939
3,Brazil,2000,80488,174504898,2168.085901,0.046124
4,China,1999,212258,1272915272,5997.019062,0.016675
5,China,2000,213766,1280428583,5989.860796,0.016695


In [180]:
#Need to create a bunch of new columns based on existing columns? Use this pattern:

#for col in df.columns:
   # df[f'{col}_new'] = df[col].apply(my_function)
    
cities = pd.DataFrame({'State':['NY', 'az', 'cA', 'Ok'], 'counTRY': ['Us', 'us','us','uS']})
cities

Unnamed: 0,State,counTRY
0,NY,Us
1,az,us
2,cA,us
3,Ok,uS


In [201]:
for col in cities.columns:
    cities[f'{col}'] = cities[col].str.upper()
    

In [202]:
cities

Unnamed: 0,State_fixed,counTRY_fixed,State_fixed_fixed,counTRY_fixed_fixed,State_fixed_fixed_fixed,counTRY_fixed_fixed_fixed,State_fixed_fixed_fixed_fixed,counTRY_fixed_fixed_fixed_fixed
0,NY,US,NY,US,NY,US,NY,US
1,AZ,US,AZ,US,AZ,US,AZ,US
2,CA,US,CA,US,CA,US,CA,US
3,OK,US,OK,US,OK,US,OK,US


In [203]:
cities.pop('counTRY_fixed_fixed_fixed_fixed')

0    US
1    US
2    US
3    US
Name: counTRY_fixed_fixed_fixed_fixed, dtype: object

In [204]:
cities

Unnamed: 0,State_fixed,counTRY_fixed,State_fixed_fixed,counTRY_fixed_fixed,State_fixed_fixed_fixed,counTRY_fixed_fixed_fixed,State_fixed_fixed_fixed_fixed
0,NY,US,NY,US,NY,US,NY
1,AZ,US,AZ,US,AZ,US,AZ
2,CA,US,CA,US,CA,US,CA
3,OK,US,OK,US,OK,US,OK


In [48]:
import pandas as pd
import numpy as np
groceries = pd.DataFrame(np.array([[10,20,30], 
                                   [20,50,70], 
                                   [40,60,90]]), 
                   columns= ['Hostel_A', 'Hostel_B', 'Hostel_C'], 
                   index= ['Sugar', 'Milk', 'Chocolate']) 

In [49]:
groceries

Unnamed: 0,Hostel_A,Hostel_B,Hostel_C
Sugar,10,20,30
Milk,20,50,70
Chocolate,40,60,90


In [50]:
df2

Unnamed: 0,A,B,C
Sugar,1,2,3
Milk,2,5,7
Chocolate,4,6,7


In [55]:
#3 ways to rename columns:

#1. Most flexible option:
#df = df.rename({'A':'a', 'B':'b'}, axis='columns')

#2. Overwrite all column names:
#df.columns = ['a', 'b']

#3. Apply string method:
#df.columns = df.columns.str.lower()

In [57]:
df2

Unnamed: 0,A,B,C
Sugar,1,2,3
Milk,2,5,7
Chocolate,4,6,7


In [58]:
#flexible option to rename columns and index
df2_rename = df2.rename(columns = {'A':'price($)', 
                                   'B': 'customers', 
                                   'C': 'Quantity'}, 
                        index = {'Milo': 'Ovaltine'}) 
df2_rename

Unnamed: 0,price($),customers,Quantity
Sugar,1,2,3
Milk,2,5,7
Chocolate,4,6,7


In [52]:
groceries

Unnamed: 0,Hostel_A,Hostel_B,Hostel_C
Sugar,10,20,30
Milk,20,50,70
Chocolate,40,60,90


In [54]:
groceries.rename(columns = str.upper)

Unnamed: 0,HOSTEL_A,HOSTEL_B,HOSTEL_C
Sugar,10,20,30
Milk,20,50,70
Chocolate,40,60,90


In [295]:
#we can also use the Lambda function

df2_rename.rename(index = lambda s: s + '750g')

Unnamed: 0,price($),customers,Quantity
Ovaltine750g,1,2,3
Milk750g,2,5,7
Chocolate750g,4,6,7


In [59]:
df2_rename

Unnamed: 0,price($),customers,Quantity
Sugar,1,2,3
Milk,2,5,7
Chocolate,4,6,7


In [60]:
#add prefix to data frame in all columns
df2_rename.add_prefix('item_')

Unnamed: 0,item_price($),item_customers,item_Quantity
Sugar,1,2,3
Milk,2,5,7
Chocolate,4,6,7


In [61]:
#add suffix to data frame in all columns
df2_rename.add_suffix('_item')

Unnamed: 0,price($)_item,customers_item,Quantity_item
Sugar,1,2,3
Milk,2,5,7
Chocolate,4,6,7


In [300]:
df2_rename

Unnamed: 0,price($),customers,Quantity
Ovaltine,1,2,3
Milk,2,5,7
Chocolate,4,6,7


In [306]:
df3 = df2_rename.customers.reset_index()

In [320]:
df3.iloc[2]

index        Chocolate
customers            6
Name: 2, dtype: object

Unnamed: 0,price($),customers,Quantity
Ovaltine,1,2,3
Milk,2,5,7
Chocolate,4,6,7


In [326]:
df2_rename.shape

(3, 3)

In [1]:
#Want to filter a DataFrame to only include the largest categories?

#1. Save the value_counts() output
#2. Get the index of its head()
#3. Use that index with isin() to filter the DataFrame

import pandas as pd
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_most-followed_Twitter_accounts')
df[0].head()

Unnamed: 0,Rank,Change (monthly),Account name,Owner,Followers (millions),Activity
0,1,,@BarackObama,Barack Obama,121,Former U.S. president
1,2,,@justinbieber,Justin Bieber,112,Musician
2,3,,@katyperry,Katy Perry,108,Musician
3,4,,@rihanna,Rihanna,98,Musician and businesswoman
4,5,,@Cristiano,Cristiano Ronaldo,87,Football player


In [2]:
df_new = pd.concat([df[0], df[1]])
df_new.head()

Unnamed: 0,Rank,Change (monthly),Account name,Owner,Followers (millions),Activity,vteTwitter,vteTwitter.1
0,1.0,,@BarackObama,Barack Obama,121.0,Former U.S. president,,
1,2.0,,@justinbieber,Justin Bieber,112.0,Musician,,
2,3.0,,@katyperry,Katy Perry,108.0,Musician,,
3,4.0,,@rihanna,Rihanna,98.0,Musician and businesswoman,,
4,5.0,,@Cristiano,Cristiano Ronaldo,87.0,Football player,,


In [3]:
df_new['vteTwitter'].fillna('Not Available', inplace=True)
df_new['vteTwitter.1'].fillna('Not Available', inplace=True)

In [4]:
df_new.head()

Unnamed: 0,Rank,Change (monthly),Account name,Owner,Followers (millions),Activity,vteTwitter,vteTwitter.1
0,1.0,,@BarackObama,Barack Obama,121.0,Former U.S. president,Not Available,Not Available
1,2.0,,@justinbieber,Justin Bieber,112.0,Musician,Not Available,Not Available
2,3.0,,@katyperry,Katy Perry,108.0,Musician,Not Available,Not Available
3,4.0,,@rihanna,Rihanna,98.0,Musician and businesswoman,Not Available,Not Available
4,5.0,,@Cristiano,Cristiano Ronaldo,87.0,Football player,Not Available,Not Available


In [8]:
counts = df_new.Activity.value_counts() #save the value count output
counts

Musician                                           12
Musician and actress                                5
News channel                                        3
Actor                                               2
Football player                                     2
Television personality and businesswoman            2
Social media platform                               2
Comedian and television personality                 2
Sports channel                                      2
Actor and film producer                             2
Basketball player                                   1
Former U.S. president                               1
Industrial designer and tech entrepreneur           1
Current Prime Minister of India                     1
Office of the Prime Minister of India               1
Space agency                                        1
Businessman and philanthropist                      1
Newspaper                                           1
Current U.S. president      

In [9]:
largest_categories = counts.head(3).index #get the index of the head()
largest_categories

Index(['Musician', 'Musician and actress', 'News channel'], dtype='object')

In [20]:
#Use that index with isin() to filter the DataFrame
df_filtered = df_new[df_new.Activity.isin(largest_categories)].head()
df_filtered.drop(['vteTwitter', 'Change (monthly)','vteTwitter.1'], axis='columns')

Unnamed: 0,Rank,Account name,Owner,Followers (millions),Activity
1,2.0,@justinbieber,Justin Bieber,112.0,Musician
2,3.0,@katyperry,Katy Perry,108.0,Musician
5,6.0,@taylorswift13,Taylor Swift,86.0,Musician
7,8.0,@ladygaga,Lady Gaga,82.0,Musician
9,10.0,@ArianaGrande,Ariana Grande,76.0,Musician and actress


In [40]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [45]:
#Are you trying to filter a DataFrame using lots of criteria? It can be hard to write Pencil and to read! Left-pointing magnifying glass

#Instead, save the criteria as objects and use them to filter. Or, use reduce() to combine the criteria!

#save criteria as objects and use them to filter

crit1 = drinks.continent== 'Africa'
crit2 = drinks.beer_servings>100
crit3 = drinks.spirit_servings<200
crit4 = drinks.wine_servings>50

In [53]:
drinks_filtered = drinks[crit1 & crit2 & crit3 & crit4]
drinks_filtered


Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
62,Gabon,347,98,59,8.9,Africa
152,Seychelles,157,25,51,4.1,Africa
159,South Africa,225,76,81,8.2,Africa


In [54]:
#use reduce to combine the criteria

from functools import reduce
criteria = reduce(lambda x, y: x & y, [crit1, crit2, crit3, crit4])

In [56]:
drinks[criteria]

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
62,Gabon,347,98,59,8.9,Africa
152,Seychelles,157,25,51,4.1,Africa
159,South Africa,225,76,81,8.2,Africa


In [81]:
#Want to filter a DataFrame that doesn't have a name?

#Use the query() method to avoid creating an intermediate variable!

stocks = pd.read_csv('http://bit.ly/smallstocks', parse_dates = True)
stocks.head()

Unnamed: 0,Date,Close,Volume,Symbol
0,2016-10-03,31.5,14070500,CSCO
1,2016-10-03,112.52,21701800,AAPL
2,2016-10-03,57.42,19189500,MSFT
3,2016-10-04,113.0,29736800,AAPL
4,2016-10-04,57.24,20085900,MSFT


In [73]:
#lets say we want to filter the data frame to show close<100
stocks.groupby('Symbol')['Close'].mean().reset_index()

Unnamed: 0,Symbol,Close
0,AAPL,112.856667
1,CSCO,31.48
2,MSFT,57.433333


In [67]:
stocks.groupby('Symbol').mean().query('Close<100')

Unnamed: 0_level_0,Close,Volume
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
CSCO,31.48,14779830.0
MSFT,57.433333,18667270.0


In [83]:
#Need to refer to a local variable within a query() string? Just prefix it with the @ symbol!
stocks

Unnamed: 0,Date,Close,Volume,Symbol
0,2016-10-03,31.5,14070500,CSCO
1,2016-10-03,112.52,21701800,AAPL
2,2016-10-03,57.42,19189500,MSFT
3,2016-10-04,113.0,29736800,AAPL
4,2016-10-04,57.24,20085900,MSFT
5,2016-10-04,31.35,18460400,CSCO
6,2016-10-05,57.64,16726400,MSFT
7,2016-10-05,31.59,11808600,CSCO
8,2016-10-05,113.05,21453100,AAPL


In [93]:
mean_volume = stocks.Volume.mean()
mean_close = stocks.Close.mean()

In [96]:
stocks.query('Volume < @mean_volume') 

Unnamed: 0,Date,Close,Volume,Symbol
0,2016-10-03,31.5,14070500,CSCO
2,2016-10-03,57.42,19189500,MSFT
5,2016-10-04,31.35,18460400,CSCO
6,2016-10-05,57.64,16726400,MSFT
7,2016-10-05,31.59,11808600,CSCO


In [97]:
stocks.query('Close > @mean_close')

Unnamed: 0,Date,Close,Volume,Symbol
1,2016-10-03,112.52,21701800,AAPL
3,2016-10-04,113.0,29736800,AAPL
8,2016-10-05,113.05,21453100,AAPL


In [104]:
#If you want to use query() on a column name containing a space, just surround it with backticks! (New in pandas 0.25)

#lets rename stock table on the Close column

stock_rename = stocks.rename(columns = {'Close': 'Close of Stock'})
stock_rename

Unnamed: 0,Date,Close of Stock,Volume,Symbol
0,2016-10-03,31.5,14070500,CSCO
1,2016-10-03,112.52,21701800,AAPL
2,2016-10-03,57.42,19189500,MSFT
3,2016-10-04,113.0,29736800,AAPL
4,2016-10-04,57.24,20085900,MSFT
5,2016-10-04,31.35,18460400,CSCO
6,2016-10-05,57.64,16726400,MSFT
7,2016-10-05,31.59,11808600,CSCO
8,2016-10-05,113.05,21453100,AAPL


In [113]:
stock_rename.query('`Close of Stock` > 100')

Unnamed: 0,Date,Close of Stock,Volume,Symbol
1,2016-10-03,112.52,21701800,AAPL
3,2016-10-04,113.0,29736800,AAPL
8,2016-10-05,113.05,21453100,AAPL


In [132]:
#Want to concatenate two string columns?

#Option 1: Use a string method Yarn

ufo = pd.read_csv('http://bit.ly/uforeports')
ufo.head()


Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
3,Abilene,,DISK,KS,6/1/1931 13:00
4,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00


In [131]:
#Concatenate with str.cat()

ufo.City.str.cat(ufo.State, sep = ', ').head()

0                  Ithaca, NY
1             Willingboro, NJ
2                 Holyoke, CO
3                 Abilene, KS
4    New York Worlds Fair, NY
Name: City, dtype: object

In [135]:
#Option 2: Use plus signs 
#Concatenante with plus sign
ufo_cat = ufo.City + ' ,' + ufo.State

In [136]:
ufo_cat.head()

0                  Ithaca ,NY
1             Willingboro ,NJ
2                 Holyoke ,CO
3                 Abilene ,KS
4    New York Worlds Fair ,NY
dtype: object

In [137]:
#Need to split a string into multiple columns? 

#Use str.split() method, expand=True to return a DataFrame, and assign it to the original DataFrame.

names = pd.DataFrame({'name': ['Abiodun Adetola Joshua', 'Micah Ademuyiwa John'], 'location': ['lagos, Nigeria', 'Port Harcourt, Nigeria']})

In [138]:
names

Unnamed: 0,name,location
0,Abiodun Adetola Joshua,"lagos, Nigeria"
1,Micah Ademuyiwa John,"Port Harcourt, Nigeria"


In [142]:
#Split Name column into first, middle and last name
names[['First name', 'Middle Name', 'Last Name']] = names.name.str.split(' ', expand = True)

names


Unnamed: 0,name,location,First name,Middle Name,Last Name
0,Abiodun Adetola Joshua,"lagos, Nigeria",Abiodun,Adetola,Joshua
1,Micah Ademuyiwa John,"Port Harcourt, Nigeria",Micah,Ademuyiwa,John


In [146]:
#Split location and Keep City
names['City'] = names.location.str.split(', ', expand =True)[0]
names

Unnamed: 0,name,location,First name,Middle Name,Last Name,City
0,Abiodun Adetola Joshua,"lagos, Nigeria",Abiodun,Adetola,Joshua,lagos
1,Micah Ademuyiwa John,"Port Harcourt, Nigeria",Micah,Ademuyiwa,John,Port Harcourt


In [150]:
names['Country'] = names.location.str.split(', ', expand = True)[1]
names

Unnamed: 0,name,location,First name,Middle Name,Last Name,City,Country
0,Abiodun Adetola Joshua,"lagos, Nigeria",Abiodun,Adetola,Joshua,lagos,Nigeria
1,Micah Ademuyiwa John,"Port Harcourt, Nigeria",Micah,Ademuyiwa,John,Port Harcourt,Nigeria


In [151]:
names.drop(columns = 'City')

Unnamed: 0,name,location,First name,Middle Name,Last Name,Country
0,Abiodun Adetola Joshua,"lagos, Nigeria",Abiodun,Adetola,Joshua,Nigeria
1,Micah Ademuyiwa John,"Port Harcourt, Nigeria",Micah,Ademuyiwa,John,Nigeria


In [154]:
names.rename(columns = str.upper)

Unnamed: 0,NAME,LOCATION,FIRST NAME,MIDDLE NAME,LAST NAME,CITY,COUNTRY
0,Abiodun Adetola Joshua,"lagos, Nigeria",Abiodun,Adetola,Joshua,lagos,Nigeria
1,Micah Ademuyiwa John,"Port Harcourt, Nigeria",Micah,Ademuyiwa,John,Port Harcourt,Nigeria


In [157]:
#How to count the no of words in a series
names['word count'] = names.location.str.split().str.len()
names

Unnamed: 0,name,location,First name,Middle Name,Last Name,City,Country,word count
0,Abiodun Adetola Joshua,"lagos, Nigeria",Abiodun,Adetola,Joshua,lagos,Nigeria,2
1,Micah Ademuyiwa John,"Port Harcourt, Nigeria",Micah,Ademuyiwa,John,Port Harcourt,Nigeria,3


In [167]:
#Select columns by data type
df_new.head()

Unnamed: 0,Rank,Change (monthly),Account name,Owner,Followers (millions),Activity,Country,vteTwitter,vteTwitter.1
0,1.0,,@BarackObama,Barack Obama,115.0,Former U.S. president,USA,Not Available,Not Available
1,2.0,,@justinbieber,Justin Bieber,111.0,Musician,CAN,Not Available,Not Available
2,3.0,,@katyperry,Katy Perry,109.0,Musician,USA,Not Available,Not Available
3,4.0,,@rihanna,Rihanna,96.0,Musician and businesswoman,BAR,Not Available,Not Available
4,5.0,,@taylorswift13,Taylor Swift,86.0,Musician,USA,Not Available,Not Available


In [169]:
df_new.select_dtypes(include='number').head()

Unnamed: 0,Rank,Change (monthly),Followers (millions)
0,1.0,,115.0
1,2.0,,111.0
2,3.0,,109.0
3,4.0,,96.0
4,5.0,,86.0


In [173]:
df_new.select_dtypes(include=['object']).head()

Unnamed: 0,Account name,Owner,Activity,Country,vteTwitter,vteTwitter.1
0,@BarackObama,Barack Obama,Former U.S. president,USA,Not Available,Not Available
1,@justinbieber,Justin Bieber,Musician,CAN,Not Available,Not Available
2,@katyperry,Katy Perry,Musician,USA,Not Available,Not Available
3,@rihanna,Rihanna,Musician and businesswoman,BAR,Not Available,Not Available
4,@taylorswift13,Taylor Swift,Musician,USA,Not Available,Not Available


In [174]:
df_new.dtypes

Rank                    float64
Change (monthly)        float64
Account name             object
Owner                    object
Followers (millions)    float64
Activity                 object
Country                  object
vteTwitter               object
vteTwitter.1             object
dtype: object