# Converting data types

In [1]:
import pandas as pd

In [3]:
# working with the weather.csv, now read into pandas DataFrame (2D table)
weather=pd.read_csv('weather.csv')

In [6]:
# shows columns, dtypes, and non-null count
weather.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   date              5 non-null      object 
 1   temperature_high  5 non-null      float64
 2   temperature_low   5 non-null      object 
 3   rained            5 non-null      int64  
 4   snowed            5 non-null      bool   
 5   overcast          5 non-null      object 
 6   comments          5 non-null      object 
dtypes: bool(1), float64(1), int64(1), object(4)
memory usage: 373.0+ bytes


In [8]:
weather.columns

Index(['date', 'temperature_high', 'temperature_low', 'rained', 'snowed',
       'overcast', 'comments'],
      dtype='object')

In [10]:
weather.head(10)

Unnamed: 0,date,temperature_high,temperature_low,rained,snowed,overcast,comments
0,2021-01-01,4.0,1,1,False,cloudy,happy new year
1,2021-01-02,11.0,2,0,False,sunny,second day
2,2021-01-03,3.0,2,0,False,foggy,third day
3,2021-01-04,6.0,2,0,False,sunny,first business day
4,2021-01-05,4.0,unknown,0,False,cloudy,second business day


# Convert the default data types from this

In [13]:
weather.dtypes

date                 object
temperature_high    float64
temperature_low      object
rained                int64
snowed                 bool
overcast             object
comments             object
dtype: object

In [16]:
# overwrite the current (temperature_high) values into int8 dtypes
weather['temperature_high']=weather['temperature_high'].astype('int8')

In [17]:
weather.dtypes

date                object
temperature_high      int8
temperature_low     object
rained               int64
snowed                bool
overcast            object
comments            object
dtype: object

In [18]:
# overwrites weather DataFrame at the 'rained' column to a bool dtypes
weather['rained']=weather['rained'].astype('bool')

In [20]:
weather.dtypes

date                object
temperature_high      int8
temperature_low     object
rained                bool
snowed                bool
overcast            object
comments            object
dtype: object

In [21]:
weather['rained']

0     True
1    False
2    False
3    False
4    False
Name: rained, dtype: bool

In [24]:
# convert using astype and dictionary as an argument
weather=weather.astype({'overcast' : 'category', 'comments' : 'string'})

In [25]:
weather.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   date              5 non-null      object  
 1   temperature_high  5 non-null      int8    
 2   temperature_low   5 non-null      object  
 3   rained            5 non-null      bool    
 4   snowed            5 non-null      bool    
 5   overcast          5 non-null      category
 6   comments          5 non-null      string  
dtypes: bool(2), category(1), int8(1), object(2), string(1)
memory usage: 400.0+ bytes


In [26]:
weather['overcast']

0    cloudy
1     sunny
2     foggy
3     sunny
4    cloudy
Name: overcast, dtype: category
Categories (3, object): ['cloudy', 'foggy', 'sunny']

In [27]:
weather['comments']

0         happy new year
1             second day
2              third day
3     first business day
4    second business day
Name: comments, dtype: string

In [28]:
weather.dtypes

date                  object
temperature_high        int8
temperature_low       object
rained                  bool
snowed                  bool
overcast            category
comments              string
dtype: object

In [29]:
weather['temperature_low']=pd.to_numeric(weather['temperature_low'], errors='coerce')

In [30]:
weather['temperature_low']

0    1.0
1    2.0
2    2.0
3    2.0
4    NaN
Name: temperature_low, dtype: float64

In [32]:
# weather DataFrame at the 'date' column overwrite astype('datetime64')
weather['date']=weather['date'].astype('datetime64')
weather['date']=pd.to_datetime(weather['date'])

In [33]:
weather['date']

0   2021-01-01
1   2021-01-02
2   2021-01-03
3   2021-01-04
4   2021-01-05
Name: date, dtype: datetime64[ns]

In [36]:
weather.dtypes

date                datetime64[ns]
temperature_high              int8
temperature_low            float64
rained                        bool
snowed                        bool
overcast                  category
comments                    string
dtype: object

# Convert heros datasets

In [37]:
hero_powers=pd.read_csv('superhero_powers.csv')
hero_dc=pd.read_excel('superhero_info.xlsx', sheet_name='DC Comics')
her_marvel=pd.read_excel('superhero_info.xlsx', sheet_name='Marvel Comics')

In [38]:
hero_powers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 667 entries, 0 to 666
Columns: 168 entries, hero_names to Omniscient
dtypes: bool(167), object(1)
memory usage: 114.1+ KB


In [39]:
hero_powers['hero_names']=hero_powers['hero_names'].astype('string')

In [40]:
hero_powers['hero_names']

0              3-D Man
1               A-Bomb
2           Abe Sapien
3             Abin Sur
4          Abomination
            ...       
662    Yellowjacket II
663               Ymir
664               Yoda
665            Zatanna
666               Zoom
Name: hero_names, Length: 667, dtype: string

In [42]:
# read the hero_powers dataset and change the dtype of the column 'hero_names' to string using a dictionary
hero_powers=pd.read_csv('superhero_powers.csv', dtype={'hero_names' : 'string'})

In [43]:
hero_powers.dtypes

hero_names               string
Agility                    bool
Accelerated Healing        bool
Lantern Power Ring         bool
Dimensional Awareness      bool
                          ...  
Phoenix Force              bool
Molecular Dissipation      bool
Vision - Cryo              bool
Omnipresent                bool
Omniscient                 bool
Length: 168, dtype: object

In [44]:
hero_powers.head()

Unnamed: 0,hero_names,Agility,Accelerated Healing,Lantern Power Ring,Dimensional Awareness,Cold Resistance,Durability,Stealth,Energy Absorption,Flight,...,Web Creation,Reality Warping,Odin Force,Symbiote Costume,Speed Force,Phoenix Force,Molecular Dissipation,Vision - Cryo,Omnipresent,Omniscient
0,3-D Man,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,A-Bomb,False,True,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,Abe Sapien,True,True,False,False,True,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,Abin Sur,False,False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,Abomination,False,True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [45]:
hero_dc.dtypes

name           object
Gender         object
Eye color      object
Race           object
Hair color     object
Height          int64
Publisher      object
Alignment      object
Weight        float64
dtype: object

In [47]:
hero_dtype = {'name' : 'string',
              'Gender' : 'category',
              'Eye color': 'string',
              'Race' : 'string',
              'Hair color' : 'string',
              'Publisher' : 'string',
              'Alignment' : 'category'}

In [49]:
hero_dc=pd.read_excel('superhero_info.xlsx', sheet_name='DC Comics', dtype=hero_dtype)
hero_marvel=pd.read_excel('superhero_info.xlsx', sheet_name='Marvel Comics', dtype=hero_dtype)

In [50]:
hero_dc.dtypes

name            string
Gender        category
Eye color       string
Race            string
Hair color      string
Height           int64
Publisher       string
Alignment     category
Weight         float64
dtype: object