In [21]:

import pandas as pd


#Question 1

# names us
def df_names_us():
    d = pd.DataFrame()
    years = range(1880,2021,1)
    year_= []
    for year in years:
        year_= pd.read_csv(f'yob{year}.txt', names=['name','gender','births'])
        year_.insert(0, 'year', year)
        d = d.append(year_)
    d.sort_values(by=['year', 'gender', 'births', 'name'], ascending=[True, True, False,True])
    d = d.reset_index(drop=True)
    return d

df_names_us()


Unnamed: 0,year,name,gender,births
0,1880,Mary,F,7065
1,1880,Anna,F,2604
2,1880,Emma,F,2003
3,1880,Elizabeth,F,1939
4,1880,Minnie,F,1746
...,...,...,...,...
2020858,2020,Zykell,M,5
2020859,2020,Zylus,M,5
2020860,2020,Zymari,M,5
2020861,2020,Zyn,M,5


In [23]:
# names fr
def df_names_fr():
    d = pd.DataFrame()
    d = pd.read_csv('nat2020.csv', sep=';', names=['gender', 'name', 'year', 'births'])
    d = d.rename(columns={'annais': 'year', 'sexe': 'gender', 'preusuel': 'name', 'nombre': 'births'})
    d = d[['year','name', 'gender', 'births']]
 
    d['year'] = d['year'].astype(dtype = "int64",errors='ignore')
    d['name'] = d['name'].astype(dtype = "str",errors='ignore')
    d['gender'] = d['gender'].astype(dtype = "str",errors='ignore')
    d['births'] = d['births'].astype(dtype = "int",errors='ignore')
    d['gender'] = d['gender'].apply(lambda x : 'M' if x=='1' else 'F')  
    
    d = d.drop(d.loc[d['name'].apply(len) < 2].index)
    d = d.drop(d.loc[d['year']=='XXXX'].index)
    d = d.drop(d.loc[d['name']=='_PRENOMS_RARES'].index)

    d['name'] = d['name'].apply(str.capitalize)
    d.sort_values(by=['year', 'gender', 'births', 'name'], ascending=[True, True, False,True], inplace=True)
    d = d.reset_index(drop=True)
    return d

df_names_fr()

  if (await self.run_code(code, result,  async_=asy)):


Unnamed: 0,year,name,gender,births
0,1900,Marie,F,48713
1,1900,Jeanne,F,13981
2,1900,Marguerite,F,8058
3,1900,Germaine,F,6981
4,1900,Louise,F,6696
...,...,...,...,...
630403,2020,Zekeriya,M,3
630404,2020,Zeynel,M,3
630405,2020,Zeïd,M,3
630406,2020,Ziade,M,3


In [17]:
# taux de change
def df_taux_change(devises):
    d = pd.DataFrame([], columns=devises)
    d = pd.read_csv('Webstat_Export_20211006.csv', sep=';', skiprows=[0,2,3,4,5], usecols=['Code série :'] + ['EXR.D.'+ devise +'.EUR.SP00.A' for devise in devises], header=0)
    
    d = d.rename(columns={**{'Code série :': 'Date'}, **{'EXR.D.'+ devise +'.EUR.SP00.A':devise for devise in devises}})
    
    d['Date'] = d['Date'].apply(lambda new_date: pd.to_datetime(new_date, dayfirst=True))
    
    d = d.set_index('Date')
    
    #  ',' --> '.'
    d = d.apply(lambda x: pd.to_numeric(x.str.replace(',', '.'), errors='coerce'))

    d = d.dropna()
    
    return d

df_taux_change(['CHF', 'GBP', 'USD'])

Unnamed: 0_level_0,CHF,GBP,USD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-10-06,1.0715,0.84970,1.1542
2021-10-05,1.0752,0.85173,1.1602
2021-10-04,1.0768,0.85530,1.1636
2021-10-01,1.0791,0.85653,1.1600
2021-09-30,1.0830,0.86053,1.1579
...,...,...,...
1999-01-22,1.5983,0.70050,1.1567
1999-01-21,1.6030,0.70230,1.1572
1999-01-20,1.6024,0.70140,1.1575
1999-01-19,1.6001,0.70000,1.1616


In [22]:
import unittest

class Lesson4Tests(unittest.TestCase):
    def test_df_names_us(self):
        df = df_names_us()
        # colonnes
        self.assertEqual(list(df.columns), ['year', 'name', 'gender', 'births'])
        # lignes
        self.assertEqual(len(df), 2020863)
        # index
        self.assertTrue(isinstance(df.index, pd.core.indexes.range.RangeIndex))
        # test NaN
        self.assertTrue(df.loc[df.isnull().any(axis=1)].empty)
        
    def test_df_names_fr(self):
        df = df_names_fr()
        # colonnes
        self.assertEqual(list(df.columns), ['year', 'name', 'gender', 'births'])
        # lignes
        self.assertEqual(len(df), 630407)
        # index
        self.assertTrue(isinstance(df.index, pd.core.indexes.range.RangeIndex))
        # test names
        self.assertTrue(df.loc[df['name'].str.contains('^[A-Z]+(?:-[A-Z]+)?$')].empty)
        # test gender
        self.assertEqual(len(df), len(df.loc[df['gender']=='F']) + len(df.loc[df['gender']=='M']))
        # test NaN
        self.assertTrue(df.loc[df.isnull().any(axis=1)].empty)
    
    def test_df_taux_change(self):
        df = df_taux_change(['CHF', 'GBP', 'USD'])
        # colonnes
        self.assertEqual(list(df.columns), ['CHF', 'GBP', 'USD'])
        # index
        self.assertTrue(isinstance(df.index, pd.core.indexes.datetimes.DatetimeIndex))
        # types taux
        self.assertTrue((df.dtypes == 'float').all())
        # test NaN
        self.assertTrue(df.loc[df.isnull().any(axis=1)].empty)
    
def run_tests():
    test_suite = unittest.makeSuite(Lesson4Tests)
    runner = unittest.TextTestRunner(verbosity=2)
    runner.run(test_suite)

    
run_tests()

test_df_names_fr (__main__.Lesson4Tests) ... ERROR
test_df_names_us (__main__.Lesson4Tests) ... ok
test_df_taux_change (__main__.Lesson4Tests) ... ok

ERROR: test_df_names_fr (__main__.Lesson4Tests)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "<ipython-input-22-850fb96dd966>", line 16, in test_df_names_fr
    df = df_names_fr()
NameError: name 'df_names_fr' is not defined

----------------------------------------------------------------------
Ran 3 tests in 6.704s

FAILED (errors=1)
