# Day 4 - Opening Doors to Your Data

## Reading Text Files

In [None]:
f = open("biomarker.csv", "r")
lines = f.readlines()
f.close()

In [5]:
lines[0]

'pDisease,biomarker\n'

In [None]:
for line in lines:
	print(line, end="\n")

pDisease,biomarker
0.0,0.0
0.05,0.5128205128205128
0.1,0.6896551724137931
0.15000000000000002,0.7792207792207793
0.2,0.8333333333333334
0.25,0.8695652173913043
0.30000000000000004,0.8955223880597016
0.35000000000000003,0.9150326797385621
0.4,0.9302325581395349
0.45,0.9424083769633508
0.5,0.9523809523809523
0.55,0.9606986899563319
0.6000000000000001,0.967741935483871
0.65,0.9737827715355805
0.7000000000000001,0.979020979020979
0.75,0.9836065573770492
0.8,0.9876543209876543
0.8500000000000001,0.9912536443148688
0.9,0.994475138121547
0.9500000000000001,0.9973753280839895
1.0,1.0


## Type Conversion

In [19]:
float(lines[7].split(",")[0]) * 5

1.5000000000000002

In [21]:
"My number is " + str(7)

'My number is 7'

## Exception Handling

In [34]:
def get_first_element_from_line(linenumber):
	if linenumber < 0 or linenumber >= len(lines):
		print("Warning, value outside range - returning 0")
		return 0.0
	line = lines[linenumber]
	first_part = line.split(",")[0]
	return float(first_part)

In [25]:
get_first_element_from_line(9)

0.4

In [35]:
get_first_element_from_line(50)



0.0

In [None]:
try:
	get_first_element_from_line(1.5)
except: # catch every error
	print("Something went wrong!")

Something went wrong!


In [45]:
try:
	get_first_element_from_line(0)
except ValueError:
	print("Something went wrong with the conversion to float!")

Something went wrong with the conversion to float!


In [33]:
get_first_element_from_line(1.5)

TypeError: list indices must be integers or slices, not float

In [46]:
1234 + float("1.1")

1235.1

In [47]:
'x' + 'x'

'xx'

In [48]:
float('x') + float('x')

ValueError: could not convert string to float: 'x'

## Recap on lists, dictionaries and DataFrames

### Lists

In [50]:
my_list = [1, 2.2, "bla", [1,2,3]]
my_list

[1, 2.2, 'bla', [1, 2, 3]]

In [51]:
len(my_list)

4

In [52]:
my_list[2]

'bla'

### Dictionaries

In [53]:
my_dict = {"integer": 1, "float": 2.2, "string": "bla", "list": [1,2,3]}

In [57]:
my_dict["list"] = [2, 3]

In [58]:
my_dict

{'integer': 1, 'float': 2.2, 'string': 'bla', 'list': [2, 3]}

In [59]:
my_dict["dict"] = {"a": 1, "b": 2}

In [60]:
my_dict

{'integer': 1,
 'float': 2.2,
 'string': 'bla',
 'list': [2, 3],
 'dict': {'a': 1, 'b': 2}}

In [61]:
my_dict["second integer"] = 1

In [62]:
my_dict

{'integer': 1,
 'float': 2.2,
 'string': 'bla',
 'list': [2, 3],
 'dict': {'a': 1, 'b': 2},
 'second integer': 1}

### DataFrames

In [63]:
import pandas as pd

#### Dictionary of Lists

In [71]:
df = pd.DataFrame({"col1": [1,2,3,4,7,2], "col2": [.1,.7,.2,3,1,.5]})
df

Unnamed: 0,col1,col2
0,1,0.1
1,2,0.7
2,3,0.2
3,4,3.0
4,7,1.0
5,2,0.5


In [72]:
df.dtypes

col1      int64
col2    float64
dtype: object

In [75]:
df["col3"] = [1,2,3,4,5,6]

In [76]:
df

Unnamed: 0,col1,col2,col3
0,1,0.1,1
1,2,0.7,2
2,3,0.2,3
3,4,3.0,4
4,7,1.0,5
5,2,0.5,6


In [77]:
df["replicate"] = 1

In [78]:
df

Unnamed: 0,col1,col2,col3,replicate
0,1,0.1,1,1
1,2,0.7,2,1
2,3,0.2,3,1
3,4,3.0,4,1
4,7,1.0,5,1
5,2,0.5,6,1


#### List of Dictionaries

In [82]:
pd.DataFrame([{"a": 1, "b": 2, "c": "bla"}, {"b": 3, "a": 7}, {"b": -1}])

Unnamed: 0,a,b,c
0,1.0,2,bla
1,7.0,3,
2,,-1,
