In [1]:
import pandas as pd

In [2]:
planets = pd.read_csv('Fontes/planets.csv')

In [3]:
planets.head(3)

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011


In [4]:
planets.dtypes

method             object
number              int64
orbital_period    float64
mass              float64
distance          float64
year                int64
dtype: object

In [5]:
planets.mean(numeric_only=True)

number               1.785507
orbital_period    2002.917596
mass                 2.638161
distance           264.069282
year              2009.070531
dtype: float64

In [6]:
planets['number'][0] / planets['mass'][0]

0.14084507042253522

In [7]:
planets['number'][0].astype(float)

1.0

In [8]:
planets['mass'][0].astype(int)

7

In [9]:
planets['year'][0].astype(str)

'2006'

In [10]:
planets['year_dt'] = pd.to_datetime(planets['year'], format='%Y')
planets['year_dt']

0      2006-01-01
1      2008-01-01
2      2011-01-01
3      2007-01-01
4      2009-01-01
          ...    
1030   2006-01-01
1031   2007-01-01
1032   2007-01-01
1033   2008-01-01
1034   2008-01-01
Name: year_dt, Length: 1035, dtype: datetime64[ns]

In [13]:
names = pd.Series(['Pomweray, CODY', 'Wagner; Jarry', 'smith, Ray'])

In [14]:
names = names.str.replace(';', ',')
names

0    Pomweray, CODY
1     Wagner, Jarry
2        smith, Ray
dtype: object

In [15]:
names.str.len()

0    14
1    13
2    10
dtype: int64

In [16]:
names = names.str.strip()
names

0    Pomweray, CODY
1     Wagner, Jarry
2        smith, Ray
dtype: object

In [17]:
names = names.str.lower()
names

0    pomweray, cody
1     wagner, jarry
2        smith, ray
dtype: object

In [18]:
names = names.str.upper()
names

0    POMWERAY, CODY
1     WAGNER, JARRY
2        SMITH, RAY
dtype: object

In [19]:
names = names.str.split(', ')
names

0    [POMWERAY, CODY]
1     [WAGNER, JARRY]
2        [SMITH, RAY]
dtype: object

In [20]:
names = pd.Series([i[::-1] for i in names])
names

0    [CODY, POMWERAY]
1     [JARRY, WAGNER]
2        [RAY, SMITH]
dtype: object

In [21]:
names = [' '.join(i) for i in names]
names

['CODY POMWERAY', 'JARRY WAGNER', 'RAY SMITH']

In [22]:
daterange = pd.period_range('1/1/2020', freq='30d', periods=4)

In [23]:
date_df = pd.DataFrame(data=daterange, columns=['sample date'])
date_df

Unnamed: 0,sample date
0,2020-01-01
1,2020-01-31
2,2020-03-01
3,2020-03-31


In [25]:
date_df['date difference'] = date_df['sample date'].diff(periods=1)
date_df

  new_data = np.array([self.freq.base * x for x in new_i8_data])


Unnamed: 0,sample date,date difference
0,2020-01-01,NaT
1,2020-01-31,<30 * Days>
2,2020-03-01,<30 * Days>
3,2020-03-31,<30 * Days>


In [26]:
date_df['first of month'] = date_df['sample date'].values.astype('datetime64[M]')
date_df

Unnamed: 0,sample date,date difference,first of month
0,2020-01-01,NaT,2020-01-01
1,2020-01-31,<30 * Days>,2020-01-01
2,2020-03-01,<30 * Days>,2020-03-01
3,2020-03-31,<30 * Days>,2020-03-01


In [27]:
date_df.dtypes

sample date          period[30D]
date difference           object
first of month     datetime64[s]
dtype: object

In [28]:
date_df['sample date'] = date_df['sample date'].dt.to_timestamp()
date_df.dtypes

sample date        datetime64[ns]
date difference            object
first of month      datetime64[s]
dtype: object

In [29]:
date_df['sample date'] - date_df['first of month']

0    0 days
1   30 days
2    0 days
3   30 days
dtype: timedelta64[ns]

In [30]:
date_df['sample date'] - date_df['date difference']

  date_df['sample date'] - date_df['date difference']


0                    NaT
1    2020-01-01 00:00:00
2    2020-01-31 00:00:00
3    2020-03-01 00:00:00
dtype: object

In [31]:
date_df['sample date'] - pd.Timedelta('30 d')

0   2019-12-02
1   2020-01-01
2   2020-01-31
3   2020-03-01
Name: sample date, dtype: datetime64[ns]

In [32]:
date_df['sample date'].dt.day_name()

0    Wednesday
1       Friday
2       Sunday
3      Tuesday
Name: sample date, dtype: object

In [38]:
temps = pd.DataFrame({
    'sequence': [1, 2, 3, 4, 5],
    'measurement_type': ['actual', 'actual', 'actual', None, 'estimated'],
    'temperature_f': [67.24, 84.56, 91.61, None, 19.64]
})
temps

Unnamed: 0,sequence,measurement_type,temperature_f
0,1,actual,67.24
1,2,actual,84.56
2,3,actual,91.61
3,4,,
4,5,estimated,19.64


In [39]:
temps.isna()

Unnamed: 0,sequence,measurement_type,temperature_f
0,False,False,False
1,False,False,False
2,False,False,False
3,False,True,True
4,False,False,False


In [40]:
temps['temperature_f'].cumsum()

0     67.24
1    151.80
2    243.41
3       NaN
4    263.05
Name: temperature_f, dtype: float64

In [41]:
temps['temperature_f'].cumsum(skipna=False)

0     67.24
1    151.80
2    243.41
3       NaN
4       NaN
Name: temperature_f, dtype: float64

In [43]:
temps.groupby(by=['measurement_type']).max()

Unnamed: 0_level_0,sequence,temperature_f
measurement_type,Unnamed: 1_level_1,Unnamed: 2_level_1
actual,3,91.61
estimated,5,19.64


In [44]:
temps.dropna()

Unnamed: 0,sequence,measurement_type,temperature_f
0,1,actual,67.24
1,2,actual,84.56
2,3,actual,91.61
4,5,estimated,19.64


In [45]:
temps.dropna(axis=1)

Unnamed: 0,sequence
0,1
1,2
2,3
3,4
4,5


In [46]:
temps.fillna(0)

Unnamed: 0,sequence,measurement_type,temperature_f
0,1,actual,67.24
1,2,actual,84.56
2,3,actual,91.61
3,4,0,0.0
4,5,estimated,19.64


In [47]:
temps.fillna(method='pad')

  temps.fillna(method='pad')


Unnamed: 0,sequence,measurement_type,temperature_f
0,1,actual,67.24
1,2,actual,84.56
2,3,actual,91.61
3,4,actual,91.61
4,5,estimated,19.64


In [48]:
temps.interpolate()

  temps.interpolate()


Unnamed: 0,sequence,measurement_type,temperature_f
0,1,actual,67.24
1,2,actual,84.56
2,3,actual,91.61
3,4,,55.625
4,5,estimated,19.64


In [49]:
df = pd.DataFrame({
    'Region': ['North', 'West', 'East', 'South', 'North', 'West', 'East', 'South'],
    'Team': ['One', 'One', 'One', 'One', 'Two', 'Two', 'Two', 'Two'],
    'Revenue': [7500, 5000, 2750, 6400, 2300, 3750, 1900, 575],
    'Cost': [5200, 5100, 4400, 5300, 1250, 1300, 2100, 50]
})

In [50]:
df['Profit'] = df.apply(lambda x: 'Profit' if x['Revenue'] > x['Cost'] else 'Loss', axis=1)
df

Unnamed: 0,Region,Team,Revenue,Cost,Profit
0,North,One,7500,5200,Profit
1,West,One,5000,5100,Loss
2,East,One,2750,4400,Loss
3,South,One,6400,5300,Profit
4,North,Two,2300,1250,Profit
5,West,Two,3750,1300,Profit
6,East,Two,1900,2100,Loss
7,South,Two,575,50,Profit


In [51]:
team_map = {'One': 'Red', 'Two': 'Blue'}

In [52]:
df['Team_Color'] = df['Team'].map(team_map)
df

Unnamed: 0,Region,Team,Revenue,Cost,Profit,Team_Color
0,North,One,7500,5200,Profit,Red
1,West,One,5000,5100,Loss,Red
2,East,One,2750,4400,Loss,Red
3,South,One,6400,5300,Profit,Red
4,North,Two,2300,1250,Profit,Blue
5,West,Two,3750,1300,Profit,Blue
6,East,Two,1900,2100,Loss,Blue
7,South,Two,575,50,Profit,Blue


In [53]:
df.applymap(lambda x: len(str(x)))

  df.applymap(lambda x: len(str(x)))


Unnamed: 0,Region,Team,Revenue,Cost,Profit,Team_Color
0,5,3,4,4,6,3
1,4,3,4,4,4,3
2,4,3,4,4,4,3
3,5,3,4,4,6,3
4,5,3,4,4,6,4
5,4,3,4,4,6,4
6,4,3,4,4,4,4
7,5,3,3,2,6,4


In [55]:
new_col = []

for i in range(0, len(df)):
    rev = df['Revenue'][i] / df[df['Region'] == df.loc[i, 'Region']]['Revenue'].sum()
    new_col.append(rev)

In [56]:
df['Revenue Share of Region'] = new_col
df.sort_values(by='Region')

Unnamed: 0,Region,Team,Revenue,Cost,Profit,Team_Color,Revenue Share of Region
2,East,One,2750,4400,Loss,Red,0.591398
6,East,Two,1900,2100,Loss,Blue,0.408602
0,North,One,7500,5200,Profit,Red,0.765306
4,North,Two,2300,1250,Profit,Blue,0.234694
3,South,One,6400,5300,Profit,Red,0.917563
7,South,Two,575,50,Profit,Blue,0.082437
1,West,One,5000,5100,Loss,Red,0.571429
5,West,Two,3750,1300,Profit,Blue,0.428571
