# Create CSV for temperature per cities per year

## Import

In [1]:
import sys
import numba 
import numpy as np
from numba import cuda
import pandas as pd
import time

t=time.time()

print("Python version:", sys.version)
print("Numba version:", numba.__version__)
print("Numpy version:", np.__version__)

Python version: 3.9.13 (main, Aug 25 2022, 23:51:50) [MSC v.1916 64 bit (AMD64)]
Numba version: 0.55.1
Numpy version: 1.21.5


## Load datas

In [2]:
data_cities=pd.read_csv("GlobalLandTemperaturesByCity.csv")
full_city=data_cities[['dt', 'City', 'AverageTemperature']].dropna()
full_city

Unnamed: 0,dt,City,AverageTemperature
0,1743-11-01,Århus,6.068
5,1744-04-01,Århus,5.788
6,1744-05-01,Århus,10.644
7,1744-06-01,Århus,14.051
8,1744-07-01,Århus,16.082
...,...,...,...
8599206,2013-04-01,Zwolle,7.710
8599207,2013-05-01,Zwolle,11.464
8599208,2013-06-01,Zwolle,15.043
8599209,2013-07-01,Zwolle,18.775


## Mean Temperature per Country per year

In [3]:
years=[str(i) for i in range(1743,2014)]

In [4]:
template=pd.DataFrame([])
template['name']=full_city['City'].unique()

In [5]:
def getfirst(l):
    if len(l)!=0:
        return l[0]

In [6]:
@cuda.jit
def compute_mean(array):
    res=0
    for x in range(len(array)):
        res+=array[x]
        
    if len(array)>5: #at least 5 month of data to consider the year as valid
        array[0]=res/len(array)
    else:
        array[0]=-666.0

In [7]:
dict_list_years={}

for year in years:
    by_year=full_city[full_city['dt'].apply(lambda x:x[:4])==year].dropna()
    temp_list=[]
    block_size=32
    grid_size=(by_year.shape[0]+block_size-1)//block_size+1
    if not(np.any(by_year)):
        temp_list=[None]*3448
    else:
        for city in template['name']:
            by_city=np.array(by_year[by_year['City'].apply(lambda x:x==city)]['AverageTemperature'], dtype=np.float32)
            if not(np.any(by_city)):
                temp_list.append(None)
            else:
                by_city_gpu=cuda.to_device(by_city)
                compute_mean[grid_size, block_size](by_city_gpu)
                by_city_gpu.copy_to_host(by_city)
                by_city[by_city ==-666.0] = None
                temp_list.append(by_city[0])
    dict_list_years[year]=temp_list
    print(year)


1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942


In [8]:
print(len(dict_list_years['1745']))

3448


## Mean per country Calculation

In [31]:
mean_per_city=[]
for i in range(len(template['name'])):
    list_c=[]
    for elem in dict_list_years:
        list_c.append(dict_list_years[elem][i])
        
    arr=np.array(list_c)  
    new_array = arr[arr != np.array(None)]
    
    mean_per_city.append(np.nanmean(new_array)) 
    
print(mean_per_city)

[8.435222300043646, 14.16312785958344, 10.935705, 2.9681935217714064, 7.934451463926641, 13.589346828100815, 9.601663296177701, 8.435222300043646, 26.617322294679408, 26.166201023697162, 26.617322294679408, 1.3055289195889095, 7.9675374, 26.390658815445438, 26.314755643647292, 8.621439818616183, 24.955542, 26.11866778055827, 13.600138, 17.950651, 25.865728, 27.045270365637702, 27.329767, 26.197680506212958, 26.270006595958364, 27.195694, 26.247291411329435, 26.413112776620046, 4.082243050935973, 0.6548463774536172, 20.39728024396947, 19.488308, 17.54793, 15.50445, 25.915215, 27.712083235241117, 26.046174675036387, 27.58732713063558, 12.150659, 18.159070798488848, 25.558362555953693, 26.154071563329452, 13.600138, 26.189872302261055, 18.555952, 27.177109, 25.0295413062686, 26.66343, 25.716688, 12.164883215922229, 23.44956977412386, 20.475315, 26.296860244143662, 15.682358, 13.600138, 12.198375, 27.05405122666132, 10.76903202506526, 12.231572318280865, 11.607932018589329, 12.431245205014

## Mean deviation calculation and csv writing

In [32]:
for year in years:
    print(year)
    data_year=template
    data_year['Temp']=dict_list_years[year]
    
    list_mean_dev=[]
    for i in range(len(dict_list_years[year])):
        if not((data_year['Temp'][i]==None) or (mean_per_city[i]==None)):
            list_mean_dev.append(data_year['Temp'][i]-mean_per_city[i])
        else:
            list_mean_dev.append(None)
    
    data_year['Mean_deviation']=list_mean_dev
    data_year.to_csv("csv_cities_by_year/"+str(year)+"_city.csv")

1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942


In [33]:
curr_t=time.time()

print("This program took : "+str(curr_t-t)+" seconds.")
# 7121 seconds

This program took : 7121.34378862381 seconds.
