In [24]:
import pandas as pd

In [25]:
df = pd.read_csv("data/stock_data.csv")

In [26]:
df.head()

Unnamed: 0,Datasource: Google finance,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,tickers,eps,revenue,price,people
1,GOOGL,27.82,87,845,larry page
2,WMT,4.61,484,65,n.a.
3,MSFT,-1,85,64,bill gates
4,RIL,not available,50,1023,mukesh ambani


In [27]:
# We want to skip first row
df = pd.read_csv("data/stock_data.csv",skiprows=1)

In [28]:
df.head()

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


In [29]:
# We want to mention Header name

df = pd.read_csv("data/stock_data.csv",header=1)

In [30]:
df.head()

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


In [31]:
# We can specify our own column names
df = pd.read_csv("data/stock_data.csv",header=1, names=["stock_symbol","app","revenue","price","people"])

In [32]:
df.head()

Unnamed: 0,stock_symbol,app,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


In [33]:
# If you want to get first 4 rows

df = pd.read_csv("data/stock_data.csv",header=1,nrows=4)

In [34]:
df.head()

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani


In [36]:
df = pd.read_csv("data/stock_data.csv",header=1,na_values={ 'eps': ['not available'],'revenue':[-1]})

In [37]:
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845,larry page
1,WMT,4.61,484.0,65,n.a.
2,MSFT,-1.0,85.0,64,bill gates
3,RIL,,50.0,1023,mukesh ambani
4,TATA,5.6,,n.a.,ratan tata


In [39]:
df = pd.read_csv("data/stock_data.csv",header=1,na_values= ['not available',-1])

In [40]:
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845,larry page
1,WMT,4.61,484.0,65,n.a.
2,MSFT,,85.0,64,bill gates
3,RIL,,50.0,1023,mukesh ambani
4,TATA,5.6,,n.a.,ratan tata


In [42]:
df['pe'] = df.apply(lambda x : (x['revenue']/x['eps']),axis=1)

In [43]:
df

Unnamed: 0,tickers,eps,revenue,price,people,pe
0,GOOGL,27.82,87.0,845,larry page,3.127247
1,WMT,4.61,484.0,65,n.a.,104.989154
2,MSFT,,85.0,64,bill gates,
3,RIL,,50.0,1023,mukesh ambani,
4,TATA,5.6,,n.a.,ratan tata,


In [44]:
df.to_csv("pe.csv",index=False)

In [47]:
df_movies = pd.read_excel("data/movies_db.xlsx","movies")

In [49]:
df_movies.head()

Unnamed: 0,movie_id,title,industry,release_year,imdb_rating,studio,language_id
0,101,K.G.F: Chapter 2,Bollywood,2022,8.4,Hombale Films,3
1,102,Doctor Strange in the Multiverse of Madness,Hollywood,2022,7.0,Marvel Studios,5
2,103,Thor: The Dark World,Hollywood,2013,6.8,Marvel Studios,5
3,104,Thor: Ragnarok,Hollywood,2017,7.9,Marvel Studios,5
4,105,Thor: Love and Thunder,Hollywood,2022,6.8,Marvel Studios,5


In [51]:
df_financials = pd.read_excel("data/movies_db.xlsx","financials")
df_financials.head()

Unnamed: 0,movie_id,budget,revenue,unit,currency
0,101,1.0,12.5,Billions,INR
1,102,200.0,954.8,Millions,USD
2,103,165.0,644.8,Millions,$$
3,104,180.0,854.0,Millions,Dollars
4,105,250.0,670.0,Millions,USD


In [52]:
def standardize_currency(curr) :
    if curr == '$$' or curr == 'Dollars':
        return "USD"
    return curr

In [54]:
df_financials = pd.read_excel("data/movies_db.xlsx","financials",converters={
    'currency':standardize_currency
})
df_financials.head()

Unnamed: 0,movie_id,budget,revenue,unit,currency
0,101,1.0,12.5,Billions,INR
1,102,200.0,954.8,Millions,USD
2,103,165.0,644.8,Millions,USD
3,104,180.0,854.0,Millions,USD
4,105,250.0,670.0,Millions,USD


In [57]:
df_merged = pd.merge(df_movies,df_financials, on = "movie_id", how="inner", validate="many_to_many")
df_merged.head()

Unnamed: 0,movie_id,title,industry,release_year,imdb_rating,studio,language_id,budget,revenue,unit,currency
0,101,K.G.F: Chapter 2,Bollywood,2022,8.4,Hombale Films,3,1.0,12.5,Billions,INR
1,102,Doctor Strange in the Multiverse of Madness,Hollywood,2022,7.0,Marvel Studios,5,200.0,954.8,Millions,USD
2,103,Thor: The Dark World,Hollywood,2013,6.8,Marvel Studios,5,165.0,644.8,Millions,USD
3,104,Thor: Ragnarok,Hollywood,2017,7.9,Marvel Studios,5,180.0,854.0,Millions,USD
4,105,Thor: Love and Thunder,Hollywood,2022,6.8,Marvel Studios,5,250.0,670.0,Millions,USD


In [58]:
df_merged.to_excel("movies_merged.xlsx",sheet_name="merged",index=False)