In [1]:
import pandas as pd
import numpy as np
import os
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
NASA = pd.read_csv('/content/drive/MyDrive/sampled_NASA_200k.csv')

### Część I: Anotacja danych

In [3]:
selected_columns = ["lon", "lat", "Date", "Rainf", "Evap", "AvgSurfT", "Albedo", "SoilT_40_100cm", "GVEG", "PotEvap", "RootMoist", "SoilM_100_200cm"] # dopisane kolumny zasugerowane przez Izę, na razie niewykorzystywane
NASA_latest = NASA[selected_columns].dropna()

In [4]:
NASA_latest = NASA_latest[NASA_latest["Date"].astype(str).str.contains("2023")] # dane od stycznia do września z tego roku (dane z pojedynczych miesięcy są zbyt rozpierzchnięte; nie skupiają się wokół newralgicznych obszarów)

In [5]:
NASA_latest = NASA_latest.drop_duplicates(subset=["lat", "lon"])

##### Etykietowanie będzie opierać się na założeniu, iż każdy obszar jest albo pustynią, albo stepem/półpustynią (oznaczanym dalej "stepem", ze względu na dominację terenów stepowych nad półpustynnymi), albo (w wyniku odrzucenia obu tych opcji) terenem innym. Do zanotowania danych posługuję się widokiem satelitarnym Google Maps.


### Obszary pustynne -- weryfikacja

In [6]:
CD = NASA_latest[(NASA_latest['lon'] >= -104) & (NASA_latest['lon'] <= -102) & (NASA_latest['lat'] >= 30) & (NASA_latest['lat'] <= 31)]
CP = NASA_latest[(NASA_latest['lon'] >= -110.5) & (NASA_latest['lon'] <= -108.5) & (NASA_latest['lat'] >= 39) & (NASA_latest['lat'] <= 40.5)]
GBD = NASA_latest[(NASA_latest['lon'] >= -116) & (NASA_latest['lon'] <= -114) & (NASA_latest['lat'] >= 40) & (NASA_latest['lat'] <= 41.5)]

In [7]:
CD

Unnamed: 0,lon,lat,Date,Rainf,Evap,AvgSurfT,Albedo,SoilT_40_100cm,GVEG,PotEvap,RootMoist,SoilM_100_200cm
9359,-102.8125,30.9375,202305,5.427499,11.4938,299.5639,28.28495,293.0104,0.150001,313.3566,124.9108,166.2296
18929,-102.6875,30.4375,202304,0.4722,11.6929,289.4097,36.44028,286.7074,0.153661,240.376,151.7887,164.2267
32004,-103.0625,30.0625,202307,1.8527,9.266803,302.5091,37.99866,299.2646,0.096239,414.4146,195.8728,212.9337
56768,-102.5625,30.1875,202302,18.2387,17.04342,281.4062,38.99851,281.1125,0.114679,167.2516,211.1524,227.6131
85902,-103.3125,30.9375,202307,6.8308,10.36514,305.9793,27.92204,301.4564,0.108742,467.3901,262.8195,285.2028
89929,-103.8125,30.0625,202307,12.0056,18.87968,300.715,29.52554,297.5549,0.098821,404.0517,185.5338,199.4327
96495,-103.0625,30.6875,202306,18.3156,21.72756,303.1476,31.23611,297.1132,0.166519,332.7293,152.3606,235.5295
112096,-103.3125,30.8125,202306,16.597,22.1615,302.5942,28.97639,296.9542,0.149019,461.7052,170.3784,186.0652
124006,-103.0625,30.5625,202306,17.9105,25.34678,301.3902,32.89444,296.2012,0.196338,321.263,114.5642,155.2069
139411,-103.4375,30.1875,202302,19.96389,12.3738,280.5674,33.05059,280.1028,0.094678,144.235,174.3004,166.9773


In [8]:
CP

Unnamed: 0,lon,lat,Date,Rainf,Evap,AvgSurfT,Albedo,SoilT_40_100cm,GVEG,PotEvap,RootMoist,SoilM_100_200cm
9974,-109.6875,39.0625,202301,21.6874,7.799999,270.7198,24.45564,271.689,0.065519,52.45818,349.0597,138.8902
12884,-109.0625,39.3125,202306,11.602,28.69868,293.4305,22.36528,287.4216,0.332745,346.2859,175.7806,214.206
42117,-109.0625,39.8125,202304,20.7805,28.59708,279.3553,21.33055,272.9188,0.214691,160.3139,282.5334,204.45
45319,-109.8125,39.9375,202305,6.5506,19.24691,292.5058,21.26613,284.1776,0.120917,296.497,209.0736,230.8323
47483,-109.6875,40.0625,202301,44.6427,4.810997,272.7689,25.15188,272.7303,0.002207,23.00107,235.116,172.5024
48289,-109.8125,39.3125,202309,6.459199,21.01628,283.2909,23.49722,285.0117,0.331013,260.8564,276.1352,127.4177
72659,-108.6875,39.5625,202301,15.2788,16.85541,265.0864,52.6371,269.2209,0.062224,20.8246,404.5396,138.9843
96077,-110.3125,39.0625,202305,10.0748,24.59349,292.9406,22.80511,285.1188,0.064824,309.1351,227.2755,251.8101
100144,-110.3125,40.0625,202306,16.3966,29.8545,291.9973,23.10417,285.2585,0.262516,296.0976,192.7629,204.5311
112183,-108.8125,39.9375,202304,21.8454,33.64608,278.5593,20.82639,272.8708,0.193532,210.8214,270.5662,195.2379


In [9]:
GBD

Unnamed: 0,lon,lat,Date,Rainf,Evap,AvgSurfT,Albedo,SoilT_40_100cm,GVEG,PotEvap,RootMoist,SoilM_100_200cm
7724,-115.9375,41.0625,202304,13.89151,24.08589,277.1928,28.12778,271.711,0.169824,128.6094,338.5611,204.7021
17848,-115.8125,41.0625,202308,45.3807,39.79107,293.8333,26.90322,289.7812,0.136204,214.8309,204.7466,235.6484
23209,-115.3125,41.1875,202301,48.1774,2.5992,264.6199,66.11828,271.6496,0.002207,2.760081,374.286,230.9133
27914,-115.5625,41.1875,202303,59.98549,18.5645,270.039,46.0336,269.6846,0.063446,53.47971,364.3288,211.9182
48063,-115.5625,41.4375,202304,19.16611,27.43222,277.1151,27.71528,271.9406,0.150803,157.4522,335.552,214.2337
67803,-114.1875,40.4375,202306,11.24699,23.04858,290.7816,37.9875,285.2151,0.08652,235.9082,196.8165,198.4986
97001,-114.5625,40.9375,202302,3.717599,3.611002,261.3431,65.32738,266.6818,0.02278,4.264136,291.3078,193.6302
119050,-115.9375,40.3125,202307,10.53541,41.9226,295.4646,27.03763,284.3665,0.222856,282.4388,211.2412,264.0882
143714,-115.8125,40.1875,202302,13.8412,8.256295,268.64,42.11607,271.7033,0.011187,28.49479,396.0365,247.7887


In [None]:
CD['pustynia'] = [1,0,1,0,1,1,1,1,1,1,0]
CD['step'] = [0,1,0,1,0,0,0,0,0,0,1]
CP['pustynia'] = [0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0]
CP['step'] = [1,0,1,0,1,0,1,0,0,1,1,1,1,0,0,1]
GBD['pustynia'] = [1,1,1,1,1,1,0,1,1]
GBD['step'] = [0,0,0,0,0,0,1,0,0]

### Obszary pustynno - niepustynne -- weryfikacja

In [11]:
CD_i_niepustynia = NASA_latest[(NASA_latest['lon'] >= -106.5) & (NASA_latest['lon'] <= -104.5) & (NASA_latest['lat'] >= 32.5) & (NASA_latest['lat'] <= 33.5)]
CP_i_niepustynia = NASA_latest[(NASA_latest['lon'] >= -109) & (NASA_latest['lon'] <= -107) & (NASA_latest['lat'] >= 37.5) & (NASA_latest['lat'] <= 39)]
GBD_i_niepustynia = NASA_latest[(NASA_latest['lon'] >= -115) & (NASA_latest['lon'] <= -113) & (NASA_latest['lat'] >= 42.5) & (NASA_latest['lat'] <= 44)]

In [12]:
CD_i_niepustynia

Unnamed: 0,lon,lat,Date,Rainf,Evap,AvgSurfT,Albedo,SoilT_40_100cm,GVEG,PotEvap,RootMoist,SoilM_100_200cm
18848,-105.4375,32.5625,202303,3.8746,13.15041,279.0908,30.56452,277.7809,0.132503,166.4814,179.7137,183.7663
31930,-104.9375,32.5625,202301,1.5016,12.52171,274.9598,26.27285,277.2696,0.100762,105.3368,204.987,225.1061
110488,-105.0625,32.6875,202303,1.0662,8.970009,280.6528,27.25134,279.302,0.111205,275.7034,190.7704,229.5919
110849,-104.5625,32.9375,202301,3.1167,2.0982,276.7611,25.65726,278.6888,0.082208,89.11093,74.78488,126.588
133948,-104.8125,32.6875,202309,33.97,34.20612,297.146,26.14444,296.9987,0.211357,282.7884,181.1602,190.7903
155624,-105.8125,33.1875,202306,28.2376,50.15289,290.0075,35.57778,284.917,0.391781,403.111,429.1685,218.1277
183273,-104.9375,33.0625,202304,0.1748,4.272196,285.9594,27.02778,282.5613,0.112026,337.2133,180.3347,207.1992


In [13]:
CP_i_niepustynia

Unnamed: 0,lon,lat,Date,Rainf,Evap,AvgSurfT,Albedo,SoilT_40_100cm,GVEG,PotEvap,RootMoist,SoilM_100_200cm
5395,-108.3125,38.6875,202303,41.56859,28.7347,275.3177,19.31183,273.0527,0.160943,130.6344,302.175,232.7178
14157,-108.8125,37.8125,202304,5.632401,31.93938,276.756,28.14722,272.5503,0.267994,240.0859,485.3907,179.2533
23546,-108.5625,38.0625,202301,47.43541,9.7887,272.107,26.33602,272.8308,0.093554,36.33562,264.464,154.8964
29095,-107.9375,37.8125,202306,44.68142,27.23099,273.872,38.13334,271.1082,0.706376,200.3576,778.8061,404.6041
51418,-107.6875,38.1875,202304,34.9998,36.8722,276.0361,17.33333,272.0784,0.267471,225.1446,529.9483,218.641
96670,-107.5625,38.9375,202303,59.53609,32.21329,272.4331,19.82661,271.3754,0.119452,139.4796,482.8987,163.397
100154,-108.6875,37.6875,202301,33.4568,17.78769,267.4477,44.16532,270.8058,0.12318,29.00659,378.2711,132.2829
117428,-107.5625,37.9375,202302,0.0919,19.135,249.7137,62.8125,269.1786,0.028164,22.38453,301.2202,418.574
128282,-108.1875,38.3125,202308,43.4254,32.99941,287.8529,21.0457,278.9937,0.628949,308.5474,487.1653,269.5126
131019,-108.5625,38.3125,202306,12.4044,63.34715,290.692,22.40278,284.9901,0.478268,322.347,152.4087,205.8036


In [14]:
GBD_i_niepustynia

Unnamed: 0,lon,lat,Date,Rainf,Evap,AvgSurfT,Albedo,SoilT_40_100cm,GVEG,PotEvap,RootMoist,SoilM_100_200cm
1974,-114.4375,43.1875,202302,4.0176,6.3837,267.9365,42.83036,268.1578,0.04808,27.79317,222.7477,160.6388
2597,-113.5625,43.4375,202308,45.0378,35.2344,292.1919,33.01075,287.9557,0.164972,205.4857,208.4921,247.3011
4286,-114.4375,43.4375,202307,0.6999,48.15249,293.2379,31.95027,282.5198,0.451744,293.205,173.3846,220.6463
4539,-113.6875,43.3125,202306,38.32759,47.29372,289.7093,29.02222,281.198,0.238218,302.8073,517.8953,285.0337
35485,-113.9375,43.0625,202309,39.87291,21.36901,289.8605,32.075,290.2597,0.170522,190.4851,171.2929,206.2339
35803,-114.5625,43.5625,202303,18.055,29.1023,260.8942,70.85349,270.0521,0.063344,30.75147,509.0996,203.7366
52038,-113.0625,42.6875,202306,37.5996,64.89129,291.3746,29.28472,285.654,0.520004,220.6047,123.8462,147.8618
71940,-114.1875,42.5625,202303,32.77078,22.7801,274.5293,32.9664,272.3167,0.139594,76.91534,251.3051,196.9237
72871,-114.8125,42.8125,202304,8.184402,21.9051,282.6422,25.84167,278.3638,0.340216,182.7968,148.3329,103.4825
94427,-113.5625,43.8125,202308,86.3863,46.03413,288.0577,33.01075,282.5513,0.351327,181.1579,218.5492,258.7739


In [None]:
CD_i_niepustynia['pustynia'] = [0,0,0,0,1,0,1]
CD_i_niepustynia['step'] = [1,1,1,1,0,0,0]
CP_i_niepustynia['pustynia'] = [1,0,0,0,0,0,0,0,0,0,0,1,0,0]
CP_i_niepustynia['step'] = [0,1,1,0,0,0,0,1,0,1,1,0,0,1]
GBD_i_niepustynia['pustynia'] = [0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,1,0,0]
GBD_i_niepustynia['step'] = [1,1,1,0,0,0,1,1,0,1,1,0,1,1,1,0,1,1]

### Obszary niepustynne -- weryfikacja

In [16]:
niepustynia_przy_CD = NASA_latest[(NASA_latest['lon'] >= -109.5) & (NASA_latest['lon'] <= -107.5) & (NASA_latest['lat'] >= 33) & (NASA_latest['lat'] <= 34)]
niepustynia_przy_CP = NASA_latest[(NASA_latest['lon'] >= -107) & (NASA_latest['lon'] <= -105) & (NASA_latest['lat'] >= 39) & (NASA_latest['lat'] <= 40.5)]
niepustynia_przy_GBD = NASA_latest[(NASA_latest['lon'] >= -124) & (NASA_latest['lon'] <= -122) & (NASA_latest['lat'] >= 39.5) & (NASA_latest['lat'] <= 41)]

In [17]:
niepustynia_przy_CD

Unnamed: 0,lon,lat,Date,Rainf,Evap,AvgSurfT,Albedo,SoilT_40_100cm,GVEG,PotEvap,RootMoist,SoilM_100_200cm
14852,-109.3125,33.4375,202308,48.26299,66.48133,295.0195,27.34274,292.2154,0.382462,347.1584,360.9653,181.8461
53380,-108.4375,33.6875,202304,0.0456,25.13867,279.9706,23.19167,273.0999,0.322026,223.8354,278.4115,239.9354
55193,-107.6875,33.8125,202302,10.2434,16.312,272.1968,27.54316,272.8661,0.212485,75.96273,277.1209,244.5791
88469,-109.1875,33.4375,202304,0.0,21.3849,284.0562,21.26667,280.0602,0.3333,409.9839,448.8846,238.2996
100755,-107.9375,33.1875,202305,40.9404,43.20212,283.7792,26.66801,275.5688,0.402229,329.8168,505.8405,259.0211
102911,-107.9375,33.4375,202307,14.1134,31.9658,294.2789,30.17473,287.5587,0.344941,325.8191,431.119,228.7828
144237,-109.4375,33.6875,202303,34.79459,33.72149,272.8934,25.0,273.0006,0.386447,165.7187,608.3927,260.4124
164042,-109.3125,33.0625,202308,32.26001,35.5727,301.9285,32.30376,298.6757,0.20246,381.4294,187.9955,213.1467
182469,-108.9375,33.3125,202309,35.1911,52.64795,292.5587,28.025,292.9172,0.354819,312.7836,471.3717,233.8724
189196,-108.4375,33.8125,202307,21.4192,40.5486,293.6267,25.45161,286.5213,0.372554,326.9028,407.189,214.6782


In [18]:
niepustynia_przy_CP

Unnamed: 0,lon,lat,Date,Rainf,Evap,AvgSurfT,Albedo,SoilT_40_100cm,GVEG,PotEvap,RootMoist,SoilM_100_200cm
1840,-106.0625,39.1875,202308,51.2118,35.73089,284.2452,20.02285,278.0268,0.497524,279.6495,402.0955,216.3131
13548,-106.5625,39.9375,202303,36.28029,30.57779,270.1275,23.44624,269.4037,0.096973,128.8241,528.0943,211.0841
25366,-105.9375,40.1875,202309,9.368398,23.4095,281.511,20.07083,279.6938,0.506176,212.1331,433.7609,222.2454
36018,-106.8125,39.9375,202302,3.781399,8.812499,262.9857,50.00744,266.2602,0.02535,15.04345,290.4982,192.0873
36901,-106.8125,40.4375,202306,49.34617,37.53431,285.8131,20.09167,273.1193,0.773962,333.4099,642.6102,354.0271
40054,-106.6875,39.9375,202302,3.718299,9.515096,263.8924,44.38393,266.5998,0.027542,23.19747,290.9669,193.2207
63347,-105.6875,39.8125,202306,69.0554,37.54749,279.7713,20.71806,271.7418,0.55445,232.7507,636.7927,393.9274
81919,-105.5625,39.3125,202307,37.3594,35.827,285.5029,22.80242,276.0515,0.504824,277.9523,439.7804,227.0565
85013,-105.4375,39.8125,202305,110.7559,38.79239,281.9957,21.89785,272.446,0.481862,239.7324,457.3399,192.2736
85320,-105.1875,40.0625,202307,57.33041,95.03979,296.6759,25.31317,288.9234,0.510001,251.3271,208.9057,219.3828


In [19]:
niepustynia_przy_GBD

Unnamed: 0,lon,lat,Date,Rainf,Evap,AvgSurfT,Albedo,SoilT_40_100cm,GVEG,PotEvap,RootMoist,SoilM_100_200cm
6308,-122.9375,40.8125,202304,29.8479,22.2782,278.9803,21.67083,274.4977,0.647598,191.6964,540.7118,274.275
8964,-122.8125,39.8125,202301,306.9989,18.6023,273.2296,28.65995,275.7784,0.482382,55.41656,590.8242,298.6456
22610,-122.4375,39.9375,202304,11.248,31.2452,285.3778,35.67778,280.9853,0.621454,229.6904,248.7197,268.1487
23554,-122.1875,40.6875,202302,117.792,21.15431,277.1375,24.91369,278.2955,0.473106,97.42831,522.2635,263.8248
30011,-122.8125,40.1875,202306,4.430602,69.19132,290.262,27.58472,284.747,0.618839,327.2562,414.369,216.0322
43241,-123.3125,40.8125,202301,326.5494,19.44508,274.9902,23.43414,277.5238,0.60221,70.25388,546.3722,277.4744
44284,-123.3125,40.3125,202303,325.9943,18.55751,268.6375,69.46102,274.7626,0.614836,21.24422,602.0575,293.4288
61697,-122.4375,40.0625,202303,175.8716,45.06342,280.3387,34.6922,279.2028,0.57316,143.7582,552.2076,275.2977
62570,-122.9375,39.5625,202306,2.7224,48.04777,285.4928,27.65694,280.3745,0.745978,282.7866,463.9562,242.123
66999,-123.8125,40.9375,202306,5.1724,36.19778,287.4891,18.31944,282.5106,0.940979,324.6035,457.3374,234.0978


In [None]:
niepustynia_przy_CD['pustynia'] = [0,0,1,0,0,0,0,0,0,0]
niepustynia_przy_CD['step'] = [1,1,0,1,0,0,0,1,0,0]
niepustynia_przy_CP['pustynia'] = 0
niepustynia_przy_CP['step'] = [0,1,0,0,0,1,0,0,0,0,0,0,0,1]
niepustynia_przy_GBD['pustynia'] = 0
niepustynia_przy_GBD['step'] = [0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0]

In [21]:
NASA_verified = pd.concat([CD, CP, GBD, CD_i_niepustynia, CP_i_niepustynia, GBD_i_niepustynia,
                           niepustynia_przy_CD, niepustynia_przy_CP, niepustynia_przy_GBD], ignore_index=True)

In [22]:
NASA_verified

Unnamed: 0,lon,lat,Date,Rainf,Evap,AvgSurfT,Albedo,SoilT_40_100cm,GVEG,PotEvap,RootMoist,SoilM_100_200cm,pustynia,step
0,-102.8125,30.9375,202305,5.427499,11.493800,299.5639,28.28495,293.0104,0.150001,313.3566,124.9108,166.2296,1,0
1,-102.6875,30.4375,202304,0.472200,11.692900,289.4097,36.44028,286.7074,0.153661,240.3760,151.7887,164.2267,0,1
2,-103.0625,30.0625,202307,1.852700,9.266803,302.5091,37.99866,299.2646,0.096239,414.4146,195.8728,212.9337,1,0
3,-102.5625,30.1875,202302,18.238700,17.043420,281.4062,38.99851,281.1125,0.114679,167.2516,211.1524,227.6131,0,1
4,-103.3125,30.9375,202307,6.830800,10.365140,305.9793,27.92204,301.4564,0.108742,467.3901,262.8195,285.2028,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,-123.6875,40.3125,202302,185.714100,28.289890,275.9242,23.84822,278.2262,0.659508,97.8793,540.8552,269.9910,0,0
115,-123.3125,40.5625,202305,25.272700,48.775900,285.2950,20.97581,279.9535,0.767146,240.4918,440.3339,225.0202,0,0
116,-123.3125,40.6875,202307,0.000000,50.684380,292.6227,19.99462,286.2032,0.844853,389.1568,363.1833,186.2789,0,0
117,-122.8125,39.5625,202304,28.344990,25.580200,278.2589,28.51805,275.1497,0.619477,191.9321,542.9021,279.2621,0,0


In [23]:
NASA_verified['pustynia'].sum()

31

In [24]:
NASA_verified['step'].sum()

47

In [25]:
len(NASA_verified) - NASA_verified['step'].sum() - NASA_verified['pustynia'].sum()

41

### Część II: Weryfikacja zgodności zanotowanych danych z przewidywaniami klasyfikatora

In [26]:
GVEG_graniczne = 0.333
Rainf_graniczne = 28
Evap_graniczne = 33
AvgSurfT_graniczne = 289
Albedo_graniczne = 26.7
SoilT_40_100cm_graniczne = 286

In [27]:
def classify(row: pd.DataFrame):
    conditions = [
        row['Rainf'] <= Rainf_graniczne,
        row['Evap'] <= Evap_graniczne,
        row['GVEG'] <= GVEG_graniczne,
        row['AvgSurfT'] >= AvgSurfT_graniczne,
        row['Albedo'] >= Albedo_graniczne,
        row['SoilT_40_100cm'] >= SoilT_40_100cm_graniczne
    ]
    if np.nansum(conditions) >= 4:
        return "pustynia"
    else:
        return "nie-pustynia"

In [28]:
NASA_verified['klasyfikacja'] = NASA_verified.apply(classify, axis=1)

In [29]:
NASA_verified

Unnamed: 0,lon,lat,Date,Rainf,Evap,AvgSurfT,Albedo,SoilT_40_100cm,GVEG,PotEvap,RootMoist,SoilM_100_200cm,pustynia,step,klasyfikacja
0,-102.8125,30.9375,202305,5.427499,11.493800,299.5639,28.28495,293.0104,0.150001,313.3566,124.9108,166.2296,1,0,pustynia
1,-102.6875,30.4375,202304,0.472200,11.692900,289.4097,36.44028,286.7074,0.153661,240.3760,151.7887,164.2267,0,1,pustynia
2,-103.0625,30.0625,202307,1.852700,9.266803,302.5091,37.99866,299.2646,0.096239,414.4146,195.8728,212.9337,1,0,pustynia
3,-102.5625,30.1875,202302,18.238700,17.043420,281.4062,38.99851,281.1125,0.114679,167.2516,211.1524,227.6131,0,1,pustynia
4,-103.3125,30.9375,202307,6.830800,10.365140,305.9793,27.92204,301.4564,0.108742,467.3901,262.8195,285.2028,1,0,pustynia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,-123.6875,40.3125,202302,185.714100,28.289890,275.9242,23.84822,278.2262,0.659508,97.8793,540.8552,269.9910,0,0,nie-pustynia
115,-123.3125,40.5625,202305,25.272700,48.775900,285.2950,20.97581,279.9535,0.767146,240.4918,440.3339,225.0202,0,0,nie-pustynia
116,-123.3125,40.6875,202307,0.000000,50.684380,292.6227,19.99462,286.2032,0.844853,389.1568,363.1833,186.2789,0,0,nie-pustynia
117,-122.8125,39.5625,202304,28.344990,25.580200,278.2589,28.51805,275.1497,0.619477,191.9321,542.9021,279.2621,0,0,nie-pustynia


In [35]:
worst_cases = (
    ((NASA_verified['pustynia'] == 1) & (NASA_verified['klasyfikacja'] == "nie-pustynia")) | # 7 przypadków
    ((NASA_verified['pustynia'] == 0) & (NASA_verified['step'] == 0) & (NASA_verified['klasyfikacja'] == "pustynia")) # 4 przypadki
).sum()

print(worst_cases)
print(worst_cases / len(NASA_verified))

11
0.09243697478991597


In [36]:
print(((NASA_verified['step'] == 1) & (NASA_verified['klasyfikacja'] == "nie-pustynia")).sum())
print(((NASA_verified['step'] == 1) & (NASA_verified['klasyfikacja'] == "nie-pustynia")).sum() / (NASA_verified['step'] == 1).sum())
print(((NASA_verified['step'] == 1) & (NASA_verified['klasyfikacja'] == "pustynia")).sum())
print(((NASA_verified['step'] == 1) & (NASA_verified['klasyfikacja'] == "pustynia")).sum() / (NASA_verified['step'] == 1).sum())

30
0.6382978723404256
17
0.3617021276595745


##### Możemy zaobserwować, że ok. 64% terenów stepowych/półpustynnych zostało zaklasyfikowane jako tereny niepustynne, zaś pozostałe 36% jako pustynie.

In [37]:
(len(NASA_verified) - NASA_verified['step'].sum() - worst_cases) / (len(NASA_verified) - NASA_verified['step'].sum())

0.8472222222222222

##### Ok. 84,7% terenów pustynnych lub niepustynnych zostało poprawnie oznaczone przez klasyfikator.