In [None]:
import pandas as pd
#in order to read data where separator is \t means tab
orders = pd.read_csv('http://bit.ly/chiporders', sep = '\t')
orders.head()

In [None]:
#here the separator is | and headers is not the first row of the table, so pass default indexes
movieusers = pd.read_csv('http://bit.ly/movieusers', sep='|', header = None)
movieusers.head()

In [None]:
#When you want to put column names instead of default intexes
column_names = ['user_id', 'age', 'gender', 'occupation', 'zip_code']
movieusers1 = pd.read_csv('http://bit.ly/movieusers', sep = '|', names = column_names)
movieusers1.head()

In [None]:
#here is a classical csv file
import pandas as pd
ufo = pd.read_csv('http://bit.ly/uforeports')
ufo

In [None]:
ufo.City.head()
#this works as Series too. Instead of ufo['City']! Nice....Although it doesn`t work where a space after the first word comes
#like for example in the column named: Colors Reported...
#Or if that name is a build-in attribute of the DataFrame like ufo.shape

In [None]:
#You can concatenate two Series! Nice!
ufo.City.head() + ', ' + ufo.State.head()

In [None]:
#assign a new Series into the DataFrame
ufo['Location'] = ufo.City + ', ' + ufo.State
ufo.head()

In [None]:
ufo.describe()

In [None]:
movies = pd.read_csv('http://bit.ly/imdbratings')
movies.describe()

In [None]:
# if I want to describe only the genre
movies.genre.describe()
# or I can tell it to include the objects of the DataFrame and see description of all non-numerical types
movies.describe(include = ['object'])

In [None]:
''' Cool tip: press shift+tab (multiple times to get it bigger) inside the parenthesis of a function to see which 
are the arguments it takes'''
print(movies.shape)
print(movies.dtypes)

In [None]:
ufo.columns

In [None]:
#To rename some columns use:
ufo.rename(columns = {'Shape Reported':'Shape_Reported'}, inplace = True)
ufo.columns

In [None]:
# To rename all columns you can do this:
ufo_cols = ['city', 'colors reported', 'shape reported', 'state', 'time', 'location']
ufo.columns = ufo_cols
ufo.head()

In [None]:
# I can pass column names direct when I read the file, but I must explicitly say that header = 0.
#That means that the header has some existing names and I want to change them.
ufo = pd.read_csv('http://bit.ly/uforeports', names = ufo_cols, header = 0)

In [None]:
# When you want to replace all the columns` spaces with underscores and the columns are too many
ufo.columns = ufo.columns.str.replace(' ', '_')

In [None]:
ufo.columns

In [None]:
#There are to way to drop columns. Either this:
ufo.drop(columns = ['location'], inplace = True)
ufo.columns

In [None]:
#Or this: (Here you must clarify that this namestring that you are giving is on axis = 1 for columns. Axis = 0 is for rows)
ufo.drop('Location', axis = 1, inplace = True)
ufo.columns

In [None]:
# for more than one columns you can write:
movieusers1.drop(['gender', 'occupation'], axis = 1, inplace = True)
movieusers1.columns

In [None]:
# Or you can do this:
movieusers1.drop(columns = ['age', 'zip_code'], inplace = True)

In [None]:
movieusers1.columns

In [None]:
#To drop rows there are again two options. Either the one below or drop([1,2,3,4], axis = 0, inplace = True)
movieusers1.drop(index = [1,4], inplace = True)
movieusers1.head()

In [None]:
#To sort values in a DataFrame
movies.title.sort_values()
#or movies['title'].sort_values()

In [None]:
#But you can define the order to descending too..With the sort method the underlying data are not affected as with drop etc.
movies.title.sort_values(ascending = False)

In [None]:
# The above was to sort the Series but you can sort the whole DataFrame by the Series
movies.sort_values('title', ascending = False)

In [None]:
#You can sort the dataframe by two columns too. First the one and within that the second
movies.sort_values(['star_rating', 'genre'], ascending = False)

In [None]:
#To filter in a dateframe to a given condition
movies[movies.duration >= 200]

In [None]:
#To filter the duration for example but take the genre. In fact take the genre where the duration is >= 200
movies.loc[movies.duration >= 200, 'genre']

In [None]:
#Filter to multiple contitions in pandas, use & for and, | for or !!! Use parenthesis too. YOU DONT NEED LOC
movies[(movies.duration >= 200) & ((movies.genre == 'Action') | (movies.genre == 'Drama'))]

In [None]:
# You can use the isin operator too 
movies[(movies.genre.isin(['Action', 'Horror'])) & (movies.star_rating >= 8)]

In [None]:
#To read only these two columns from the file. You can use the index too: usecols = [0,4]
movies = pd.read_csv('http://bit.ly/imdbratings', usecols = ['genre', 'duration'])

In [None]:
#iterate through a dataframe by specific rows
for index, row in ufo.iterrows():
    print(index, row.State, row.City)

In [None]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')

In [None]:
# How to drop the non-numeric columns
import numpy as np
drinks_new = drinks.select_dtypes(include=[np.number])
drinks_new

In [None]:
drinks.mean()

In [None]:
#To make the city names uppercase
ufo.City.str.upper().head()

In [None]:
#To see if a particular city is contained in the series
ufo[ufo.State.str.contains('MN')]

In [None]:
drinks.dtypes

In [None]:
#Change the datatype of a dataframe and replace it
drinks['beer_servings'] = drinks.beer_servings.astype(float)

In [None]:
drinks.dtypes

In [None]:
# to change the datatype before open a file
drinks = pd.read_csv('http://bit.ly/drinksbycountry', dtype = {'beer_servings':int})

In [None]:
drinks.dtypes

In [None]:
chip = pd.read_csv('http://bit.ly/chiporders', sep = '\t')

In [None]:
chip.head()

In [None]:
chip.item_price.str.replace('$', '').astype(float).mean()

In [None]:
#If you want to use 0 and 1 in machine learning model which represents the False and True
chip.item_name.str.contains('Chicken').astype(int).head()

In [None]:
#how to groupby something you are looking for
drinks.groupby('continent').beer_servings.mean()

In [None]:
drinks.groupby('continent').beer_servings.max()

In [None]:
drinks.groupby('continent').beer_servings.min()

In [None]:
drinks.groupby('continent').beer_servings.agg(['count', 'min', 'max', 'mean'])

In [None]:
#of course you can calculate an aggregant for all columns grouped by one column
drinks.groupby('continent').mean()

In [None]:
#magic function in iPython
%matplotlib inline

In [None]:
drinks.groupby('continent').mean().plot(kind='bar')

In [None]:
# Explore a series
movies = pd.read_csv('http://bit.ly/imdbratings')

In [None]:
movies.genre.describe()

In [None]:
movies.genre.value_counts()

In [None]:
#To get the appearence percentages of every genre
movies.genre.value_counts(normalize = True)

In [None]:
#The above is a series so you can use series methods too
movies.genre.value_counts().head(2)

In [None]:
movies.genre.unique()

In [None]:
#To see the number of unique values in the dataframe
movies.genre.nunique()

In [None]:
#How to create a crosstab with the dataframe fields
pd.crosstab(movies.genre, movies.content_rating)

In [None]:
movies.duration.plot(kind = 'hist')

In [None]:
movies.genre.value_counts().plot(kind = 'bar')

In [None]:
ufo = pd.read_csv('http://bit.ly/uforeports')

In [None]:
ufo.tail()

In [None]:
ufo.isnull().tail()

In [None]:
ufo.notnull().tail()

In [None]:
#To get the number of missing values in the dataframe
ufo.isnull().sum()

In [None]:
#To get the exact rows with the missing values in a series
ufo[ufo.City.isnull()]

In [None]:
ufo.dropna(how = 'all').shape

In [None]:
ufo.dropna(subset = ['State','Time'], how = 'all').shape

In [None]:
#To the below command missing values are excluded by default!!!
ufo['Shape Reported'].value_counts()

In [None]:
#This is how you include the NaN values to be counted
ufo['Shape Reported'].value_counts(dropna = False)

In [None]:
#Change the NaN values to a value
ufo['Shape Reported'].fillna(value = 'COULD NOT TELL', inplace = True)

In [None]:
ufo['Shape Reported'].value_counts()

In [None]:
drinks.index

In [None]:
drinks.columns

In [None]:
#If you set the index as country for example and not as a default number for each row, you can select a value by this index.
drinks.set_index('country', inplace = True)
drinks.loc['Brazil','beer_servings']

In [None]:
drinks.head()

In [None]:
drinks.index

In [None]:
#To erase the name country from the position above
drinks.index.name = None
drinks.head()

In [None]:
#To reset the indices as before and fetch the name country again as datacolumn
drinks.index.name = 'country'
drinks.head()

In [None]:
drinks.reset_index(inplace = True)
drinks.head()

In [None]:
drinks.describe()

In [None]:
drinks.describe().loc['25%','spirit_servings']

In [None]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks.set_index('country', inplace = True)
drinks.head()

In [None]:
drinks.index.name = None
drinks.head()

In [None]:
drinks.continent.value_counts().values

In [None]:
drinks.continent.value_counts()['Africa']

In [None]:
drinks.continent.value_counts().sort_values()

In [None]:
#Here you take in alphabetical order
drinks.continent.value_counts().sort_index()

In [None]:
people = pd.Series(['3000000', '40000', '35678'], index = ['Albania', 'Andorra', 'Angola'], dtype = int, name = 'Population')
people
drinks.beer_servings * people

In [None]:
#To concatenate rows or columns of a dataframe to a series use axis to tell where you want the series to be concat
pd.concat([drinks, people], sort = False, axis = 1).head()

In [None]:
ufo.loc[:, 'City'].head()

In [None]:
ufo.loc[:4,['City','State']]

In [None]:
ufo.loc[:4,'City':'State']

In [None]:
ufo.loc[ufo.City == 'Ithaca', :]

In [None]:
ufo.iloc[:4, :4]

In [None]:
drinks.head()

In [None]:
drinks.info()

In [None]:
drinks.info(memory_usage = 'deep')

In [None]:
drinks.memory_usage(deep = True)

In [None]:
drinks.memory_usage(deep = True).sum()

In [None]:
sorted(drinks.continent.unique())

In [None]:
#To store continents as int in order to save space in memory and speed up computations like groupby
# You CANNOT do that with Country, because every row has a unique country, so you will end up with more memory_usage!!!!
drinks['continent'] = drinks.continent.astype('category')

In [None]:
drinks.memory_usage(deep = True)

In [None]:
drinks.continent.cat.codes.head()

In [None]:
prod = pd.DataFrame({'ID':[100,200,300,400], 'quality':['good', 'very good', 'good', 'excellent']})
prod

In [None]:
prod.sort_values('quality')

In [None]:
#If you transform the quality to category, you can filter by category like:
prod['quality'] = prod.quality.astype('category', categories = ['good', 'very good', 'excellent'], ordered = True)

In [None]:
prod.sort_values('quality')

In [None]:
prod.loc[prod.quality > 'good', : ]

In [None]:
prod.quality

# The Titanic Kaggle dataframe

In [None]:
train = pd.read_csv('http://bit.ly/kaggletrain')
train.head()

In [None]:
feature_col = ['Pclass','Parch']
X = train.loc[:, feature_col]
X.shape

In [None]:
y = train.Survived
y.shape

In [None]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(solver = 'liblinear')
logreg.fit(X, y)

In [None]:
test = pd.read_csv('http://bit.ly/kaggletest')
test.head()

In [None]:
X_new = test.loc[:, feature_col]
X_new.shape

In [None]:
new_pred_class = logreg.predict(X_new)
new_pred_class

In [None]:
#To quarantee the Passenger Id will always be the first column I must declare it as index. Apparently in dicts is not guaranted
pd.DataFrame({'PassengerId':test.PassengerId, 'Survived': new_pred_class}).set_index('PassengerId').to_csv('kaggle.csv')

In [None]:
#To save a pandas object like a DataFrame to disk you must pickled it(that`s the name)
train.to_pickle('pickled.pkl')

In [None]:
pd.read_pickle('pickled.pkl')

# More Pandas tips

In [None]:
#To get random sample from a file. Everytime you run it you take different rows
ufo.sample(n=3)

In [None]:
#To stabilize the random sample you get every time:
ufo.sample(n=3, random_state = 42)

In [None]:
#To get a percentage fraction of a stabilized random sample:
ufo.sample(frac = 0.75, random_state = 99)

In [None]:
#How to create dummy variables with pandas
train['Sex_male'] = train.Sex.map({'female':0, 'male': 1})
train.head()

In [None]:
#You can do it with top level function
pd.get_dummies(train.Sex)

In [None]:
#When you create dummies you want k - 1 columns. E.g. you want only the female or the male column here
pd.get_dummies(train.Sex, prefix = 'Sex').iloc[:,1:]

In [None]:
#here is an example with more than two columns
pd.get_dummies(train.Embarked, prefix = 'Embarked')

In [None]:
embarked_dummies = pd.get_dummies(train.Embarked, prefix = 'Embarked').iloc[:,1:]
train = pd.concat([train, embarked_dummies],axis = 1)

In [None]:
train.head()

In [None]:
#To create dummies for multiple columns and drop the first columns of the dummies columns:
train = pd.get_dummies(train, columns=['Sex', 'Embarked'], drop_first = True)

In [None]:
train.to_csv('dummies.csv')

In [None]:
ufo = pd.read_csv('http://bit.ly/uforeports')
ufo.head()

In [None]:
#Let`s say we want to see only the hour. Find the dtype and since it`s a string, slice will do it
ufo.dtypes

In [None]:
ufo.Time.str.slice(-5, -3).astype(int).head() #Slice 5 to three characters from the end


In [None]:
#convert the time column to datetime wiht pandas
ufo['Time'] = pd.to_datetime(ufo.Time)
ufo.dtypes

In [None]:
#Now I can take only the hour easily or any other attribute of datatime
ufo.Time.dt.hour.head()

In [None]:
ufo.Time.dt.weekday_name.head() #weekday_name will be replaced by day_name() in the future

In [None]:
ufo.Time.dt.dayofyear.head()

In [None]:
#convert a given string to datetime. Weekday_name will be replace by day_name() in the future
birthday = pd.to_datetime('19/11/1983')
birthday.day_name()

In [None]:
#Now I can use these two timestamps as comparison. The colon at the end tell pandas to bring back all the rows
ufo.loc[ufo.Time.dt.year == birthday.year, : ].head()

In [None]:
#you can perform calculations. The one below is called deltatime object
ufo.Time.max() - ufo.Time.min()

In [None]:
#And you can see only the day for example
(ufo.Time.max() - ufo.Time.min()).days

In [None]:
#magic function in iPython
%matplotlib inline

In [None]:
#How to show a basic plot of the sightings through years
ufo['Year'] = ufo.Time.dt.year
ufo.head()

In [None]:
ufo.Year.value_counts().sort_index().plot()

In [None]:
# read a dataset of movie reviewers (modifying the default parameter values for read_table)
user_cols = ['user_id', 'age', 'gender', 'occupation', 'zip_code']
users = pd.read_csv('http://bit.ly/movieusers', sep='|', header=None, names=user_cols, index_col = 'user_id')
users.head()

In [None]:
users.shape

In [None]:
#to see the duplicates in the dataframe. Returns true if one of the previous rows is identical
users.zip_code.duplicated()

In [None]:
#I can sum the True duplicates to see how much are they
users.zip_code.duplicated().sum()

In [None]:
#To see how many rows is duplicates to the previous rows in the whole dataframe
users.duplicated().sum()

In [None]:
#To see these 7 rows
users.loc[users.duplicated(), :]

In [None]:
users.loc[users.duplicated(keep = 'first'), :] #keep the first duplicated rows that you find and give me the ones below them

In [None]:
users.loc[users.duplicated(keep = 'last'), :] #keep the last rows you will find and give me the ones above them

In [None]:
# all of the duplicated rows. That means both 7 previous and the ones below them
users.loc[users.duplicated(keep = False), :]

In [None]:
# now we can drop duplicates and we can do it inplace if we like
users.drop_duplicates(keep = 'first').shape

In [None]:
#when we want to identify as duplicates those that are in a particular column of columns
users.duplicated(subset = ['age', 'zip_code']).sum()

In [None]:
users.drop_duplicates(subset = ['age', 'zip_code']).shape

In [None]:
#counting missing values
movies = pd.read_csv('http://bit.ly/imdbratings')
movies.head()

In [None]:
movies.content_rating.isnull().sum()

In [None]:
#if we want to see them
movies[movies.content_rating.isnull()]

In [None]:
#count the values in content_rating column
movies.content_rating.value_counts()

In [None]:
#We want to replase the 'NOT RATED' rows with nulls...use loc to not take a 'SettingWithCopyWarning'
import numpy as np
movies.loc[movies.content_rating == 'NOT RATED', 'content_rating'] = np.nan
movies.content_rating.isnull().sum()

In [None]:
#although in the below situation we still take this warning. That is because pandas is not sure if this is a copy of the df
#or the actual df. In this case we must create a copy explicitly
top_movies = movies.loc[movies.star_rating >= 9, :]
top_movies

In [None]:
#lets say we want to change the duration of the first row because we know is wrong
top_movies.loc[0, 'duration'] = 150

In [None]:
#Although we have used loc we take the warning. Use the copy() to bypass
top_movies = movies.loc[movies.star_rating >= 9, :].copy()
top_movies.loc[0, 'duration'] = 150

In [None]:
top_movies

In [None]:
#this won`t affect the df for sure
movies.head()

In [None]:
#how we change display options in pandas
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks

In [None]:
#how many rows are being displayed as default?
pd.get_option('display.max_rows')

In [None]:
#This means it displays the 30 first and the 30 last rows. What if I want to see all of them?
pd.set_option('display.max_rows', None)

In [None]:
drinks

In [None]:
#to return to the previous default option
pd.reset_option('display.max_rows')

In [None]:
drinks

In [None]:
#to read the documentation on pandas options without having to search the web
pd.describe_option()

In [None]:
pd.describe_option('rows')

In [None]:
#to reset everything to the default display option. The warning is about some default display that have been deprecated
#thus can be ignored
pd.reset_option('all')

In [None]:
#How to create a dataframe from scratch
df = pd.DataFrame({'id':[100, 101, 102,], 'colour': ['red', 'green', 'red',],}, index = ['a', 'b', 'c',])
df

In [None]:
#you can create a df from a list of lists
df1 = pd.DataFrame([[100, 'red'],[101, 'green'],[102, 'red']], columns = ['id', 'colour'])
df1

In [None]:
#How to convert a numpy array to a dataframe
#create a numpy array with 4 rows, two columns with random values between 0 and 1
import numpy as np
arr = np.random.rand(4, 2)
arr

In [None]:
df2 = pd.DataFrame(arr, columns = ['one', 'two'])
df2

In [None]:
#create a df from scratch and use np.arrays for values
#np.arange(100,110,1) I want an array from 100 to 110 and I want every value (that will be 10 at the end)
#np.random.randint(60,101,10) i want an array with values from 60 with 60 to 101 without 101 and I want 10 of them
df3 = pd.DataFrame({'students': np.arange(100,110,1), 'test': np.random.randint(60,101,10)})
df3

In [None]:
#i can declare an index too
df3 = pd.DataFrame({'students': np.arange(100,110,1), 'test': np.random.randint(60,101,10)}).set_index('students')
df3

In [None]:
#How to create a new Series put it into the df
s = pd.Series(['dog', 'cat'], index = ['b', 'c'], name = 'animals')
s

In [None]:
pd.concat([df, s], axis = 1, sort = False)

In [None]:
train = pd.read_csv('http://bit.ly/kaggletrain')
train.head()

In [None]:
#How to use the map method. You can do more with it but this is the more used one case
train['Sex_num'] = train.Sex.map({'female':1, 'male':0})
train.loc[:4, ['Sex', 'Sex_num']]

In [None]:
#what apply do as a series method and as a df method
#let`s say we want to find the length of each row in the Name column
train['Name_length'] = train.Name.apply(len)
train.loc[0:4, ['Name', 'Name_length']]

In [None]:
#another example how to use apply. I want to round up the Fare column...I will do it with the np.ceil function
import numpy as np
train['Fare_ceil'] = train.Fare.apply(np.ceil)
train.loc[:4, ['Fare', 'Fare_ceil']]

In [None]:
#another example. I want to extract the first name of each name in the Name column. I will do it with a function
train.Name.str.split(',')
def get_element(my_list, position):
    return my_list[position]
train.Name.str.split(',').apply(get_element, position = 0)

In [None]:
#You can do the same with a lambda function
train.Name.str.split(',').apply(lambda x: x[0])

In [None]:
#the apply method as df method
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks.head()

In [None]:
#I want to find the max number for all the rows of some columns
drinks.loc[:, 'beer_servings':'wine_servings'].apply(max, axis = 0)

In [None]:
#I want the max value for each row for each column
drinks.loc[:, 'beer_servings':'wine_servings'].apply(max, axis = 1)

In [None]:
#I want to know wich column is the largest in each row
drinks.loc[:, 'beer_servings':'wine_servings'].apply(np.argmax, axis = 1)

In [None]:
#applymap as a df method. This can be used to change the original df too.

drinks.loc[:, 'beer_servings':'wine_servings'].applymap(float)

In [None]:
stocks = pd.read_csv('http://bit.ly/smallstocks')
stocks.head()

In [None]:
stocks.index

In [None]:
stocks.groupby('Symbol').Close.mean()

In [None]:
#This is a series with a multiindex
ser = stocks.groupby(['Symbol', 'Date']).Close.mean()
ser

In [None]:
ser.index

In [None]:
#you can unstack a multiindex series and will become a df
ser.unstack()

In [None]:
#You can construct the exact df with the pivot_table function
df = stocks.pivot_table(values = 'Close', index = 'Symbol', columns = 'Date')
df

In [None]:
#lets say i want from the ser all the AAPL data. You can do that with loc for a multiindex series
ser.loc['AAPL']

In [None]:
ser.loc['AAPL', '2016-10-03']

In [None]:
ser.loc[:, '2016-10-03']

In [None]:
#and if you have unstacked the series to a df. All of the above can be written for the df.For example:
df.loc['AAPL', '2016-10-03']

In [None]:
#to set a multiindex
stocks.set_index(['Symbol', 'Date'], inplace = True)
stocks

In [None]:
stocks.index

In [None]:
#better sort the above
stocks.sort_index(inplace = True)
stocks

In [None]:
#Now if I want to see the values of the first row
stocks.loc[('AAPL', '2016-10-03'), :]

In [None]:
#or only the Close column`s value
stocks.loc[('AAPL', '2016-10-03'), 'Close']

In [None]:
#To take the values for two Symbols
stocks.loc[(['AAPL', 'MSFT'], '2016-10-03'), 'Close']

In [None]:
#or to see the values for multiple dates. It a two list in a tuple thing and everything in a list.....hehe
stocks.loc[(['AAPL', 'MSFT'], ['2016-10-03','2016-10-04']), 'Close']

In [None]:
#But if I want the values for two dates for all the Symbols, I have to go with slice(None)
stocks.loc[(slice(None), ['2016-10-03','2016-10-04']), 'Close']

In [None]:
ufo = pd.read_csv('http://bit.ly/uforeports')
ufo.head()

In [None]:
#to see how many unique cities are there in the df
ufo.City.nunique()

In [None]:
ufo.shape

In [None]:
ufo.Time.nunique()

In [None]:
#You can merge two df in one on a common id: pd.merge(df1, df2). There is no example dfs to reflect here
#If you have two dfs with different names of id columns to merge on, you can use: pd.merge(df1, df2, left_on = ' ', right_on = ' ')
#If in one df you try to join on the index, you use: pd.merge(df1, df2, left_index = True, right_on = ' '), in this situation the index
#of the right column will take the place of the index of the merged df
#If you want to join on the index of both dfs: pd.merge(df1, df2, left_index = True, right_index = True), in this case the index
#of the merged df is the left df`s index
#You can decide how you want to merge the dfs: pd.merge(df1,df2, how = 'inner') or 'outer' or 'left' or 'right'

In [None]:
pd.__version__

In [None]:
# create an example DataFrame
df = pd.DataFrame([[12, 25, 2017, 10], [1, 15, 2018, 11]],
                  columns=['month', 'day', 'year', 'hour'])
df

In [None]:
# new: create a datetime column from the entire DataFrame
pd.to_datetime(df)

In [None]:
#if the df has not only these columns.Create a datetime column from the subset of the df
pd.to_datetime(df[['month', 'day', 'year', 'hour']])

In [None]:
#with this function you can overwrite the index
df.index = pd.to_datetime(df[['month', 'day', 'year', 'hour']])
df

In [None]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks.head()

In [None]:
drinks.dtypes