## STACK()

In [1]:
# REf : https://www.w3resource.com/pandas/dataframe/dataframe-stack.php
import numpy as np
import pandas as pd
df_single_level_cols = pd.DataFrame([[0, 2], [3, 4]],
                                    index=['deer', 'monkey'],
                                    columns=['weight', 'height'])
df_single_level_cols


Unnamed: 0,weight,height
deer,0,2
monkey,3,4


In [2]:
print(type(df_single_level_cols.stack()))
print(df_single_level_cols.stack())
print(df_single_level_cols.stack().index)
print(df_single_level_cols.stack().values)


<class 'pandas.core.series.Series'>
deer    weight    0
        height    2
monkey  weight    3
        height    4
dtype: int64
MultiIndex([(  'deer', 'weight'),
            (  'deer', 'height'),
            ('monkey', 'weight'),
            ('monkey', 'height')],
           )
[0 2 3 4]


In [None]:
multicol1 = pd.MultiIndex.from_tuples([('weight','kg'),('weight','pounds')])
df_multi_level_cols1 = pd.DataFrame([[3, 4], [4, 5]],
                                    index=['deer', 'monkey'],
                                    columns=multicol1)
print(df_multi_level_cols1)
df_multi_level_cols2 = pd.DataFrame([[13, 14], [14, 15]],
                                    index=multicol1,
                                    columns=['deer','monkey'])
print(df_multi_level_cols2)

In [None]:
df_multi_level_cols1.stack()

In [None]:
df_multi_level_cols1.stack([0,1])

In [None]:
df_m = pd.DataFrame([['Titanic',['animation','comedy','drama']],['K3G',['drama', 'comedy','thriller']]],columns=['title','genres'])
df_m

In [None]:
s = df_m.apply(lambda x: pd.Series(x['genres']), axis=1)
s

In [None]:
s.stack()

In [None]:
s.stack().reset_index(level=1, drop=True)

In [3]:
df1 = pd.DataFrame({'title':['titanic','K3G1','KFG'], 'genres':[['action','comedy'],['drama','thriller'],['A','B','C']]})
print(df1)

pd.Series(df1['genres'])

     title             genres
0  titanic   [action, comedy]
1     K3G1  [drama, thriller]
2      KFG          [A, B, C]


pandas.core.series.Series

In [6]:
p = df1.apply(lambda x: pd.Series(x['genres']), axis=1)
p

Unnamed: 0,0,1,2
0,action,comedy,
1,drama,thriller,
2,A,B,C


In [7]:
p.stack(dropna=False)

0  0      action
   1      comedy
   2         NaN
1  0       drama
   1    thriller
   2         NaN
2  0           A
   1           B
   2           C
dtype: object

In [8]:
p.stack(dropna=True)

0  0      action
   1      comedy
1  0       drama
   1    thriller
2  0           A
   1           B
   2           C
dtype: object

In [9]:
p.stack().reset_index(level=1, drop=True)

0      action
0      comedy
1       drama
1    thriller
2           A
2           B
2           C
dtype: object

## apply()

In [None]:
# Ref: pandas docs
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html?highlight=apply#pandas.DataFrame.apply
df = pd.DataFrame([[4, 9]] * 3, columns=['A', 'B'])
df

In [None]:
# apply function rowwise.. In each row iteration, do the sum of values(apply function) in every column(along axis=1(means columns))
df.apply(np.sum, axis=1)

In [None]:
# apply function columnswise.. In each columns iteration, do the sum of values(apply function) in every row(along axis=0(means rows))
df.apply(np.sum, axis=0)

In [None]:
# Expand will convert list-like into columns
df.apply(lambda x: [1,2,3], axis=1, result_type='expand')

In [None]:
# Returning a Series inside a function is similar to expand, which results on columns of resulting dataframe
s = df.apply(lambda x: pd.Series([1,2,3,4], index=['A','B','C','D']))
s

In [None]:
#Passing result_type='broadcast' will ensure the same shape result,
#whether list-like or scalar is returned by the function, and broadcast it along the axis.

s.apply(lambda x: [9,8,7,6], axis = 1, result_type = 'broadcast')

In [None]:
# broadcasting scalar value
s.apply(lambda x: 3, axis =1, result_type = 'broadcast')

## Join()

In [None]:
# Ref: pandas official docs
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.join.html?highlight=join#pandas.DataFrame.join
df_j = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'],
                   'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']})
df_j

In [None]:
other = pd.DataFrame({'key': ['K0', 'K1', 'K2'],
                      'B': ['B0', 'B1', 'B2']})
other

In [None]:
# Join DataFrames using their indexes.
# default how='left'
df_j.join(other, lsuffix='_caller', rsuffix='_other')

In [None]:
df_j.join(other, how='right', lsuffix='_caller', rsuffix='_other')

In [None]:
other.join(df_j, lsuffix='_caller', rsuffix='_dfj')

In [None]:
df_j.set_index('key').join(other.set_index('key'))

In [None]:
df_j = pd.DataFrame({'key': ['K0', 'K1', 'K1', 'K3', 'K0', 'K1'],
                   'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']})
df_j

In [None]:
df_j.join(other.set_index('key'), on='key')

In [None]:
# Ref: https://www.skytowner.com/explore/splitting_dictionary_into_separate_columns_in_pandas_dataframe
df = pd.DataFrame({"A":[{"a":3, "d":8},{"b":4,"c":5}], "B":[6,7]})
df

In [None]:
type(df['A'])
#df['A'].apply(pd.Series, axis=1)
df['A'].apply(pd.Series)

In [None]:
df = pd.concat([df, df['A'].apply(pd.Series)], axis=1)
df

In [None]:
df.drop('A', axis=1, inplace=True)

In [None]:
df.sort_index(axis=1)

In [None]:
df = pd.DataFrame({"A":[{"a":3, "d":8},5], "B":[7,{"b":4, "c": 6}]})
df

In [None]:
A = df['A'].apply(pd.Series)
B = df['B'].apply(pd.Series)

In [None]:
pd.concat([A, B], axis=1)

## literal_eval

In [180]:
df_le = pd.DataFrame({"title": ["titanic", "K3G"], "genres":[[{'id': 16, 'name': 'Animation'},{'id': 20, 'name': 'Adventure'}],
                                                             {'id': 10749, 'name': 'Romance'}
                                                            ]
                     })

df_le
#df_le['genres'].apply(literal_eval) # this is creating error as genres elements are not strings, rather they are a List or Dict
print(type(df_le["genres"][0]))
print(type(df_le["genres"][1]))


<class 'list'>
<class 'dict'>


In [179]:
from ast import literal_eval 

a = "[{'id': 16, 'name': 'Animation'},{'id': 20, 'name': 'Adventure'},{'id': 10749, 'name': 'Romance'} ]"
print(type(a))
type(literal_eval(a))

<class 'str'>


list

In [182]:
movies = pd.read_csv('D:\\Shubhi\\DataScience\\MoviesRecommendation\\MovieLens_Dataset\\movies_metadata_1.csv')
moviesDF = pd.DataFrame(movies)
moviesDF.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{""id"": 10194, ""name"": ""Toy Story Collection"", ...",30000000,"[{""id"": 16, ""name"": ""Animation""}, {""id"": 35, ""...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy""s toys live happily in his ...",...,30-10-1995,373554033,81,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,,Toy Story,False,7.7,5415
1,False,,65000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,15-12-1995,262797249,104,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Cancelled,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413
2,False,"{""id"": 119050, ""name"": ""Grumpy Old Men Collect...",0,"{""id"": 10749, ""name"": ""Romance""}",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,22-12-1995,0,101,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92
3,False,,16000000,"[{""id"": 35, ""name"": ""Comedy""}, {""id"": 18, ""nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,22-12-1995,81452156,127,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Postponed,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34
4,False,"{""id"": 96871, ""name"": ""Father of the Bride Col...",0,"[{""id"": 35, ""name"": ""Comedy""}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,10-02-1995,76578911,106,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Just When His World Is Back To Normal... He""s ...",Father of the Bride Part II,False,5.7,173


In [184]:
print(type(moviesDF['genres'][0]))
print(type(moviesDF['genres'].apply(literal_eval)[2]))
print(type(moviesDF['genres'].apply(literal_eval)[1]))

#type(moviesDF['genres'].apply(literal_eval)[0])

<class 'str'>
<class 'dict'>
<class 'list'>


In [199]:
expression = "['a',2]"
output = literal_eval(expression)
print(output)
print(type(output))
print(type(eval(expression)))

['a', 2]
<class 'list'>
<class 'list'>


In [197]:
a=2
exp = "a + 2"
print(exp)
print(eval(exp))
print(type(eval(exp)))
# print(literal_eval(exp)) #gives error, as operators are not allowed


a + 2
4
<class 'int'>
