# Reading + Writing a file

In [135]:
myfile = open('./data/ex.txt', 'r')
print(myfile.readlines())

#be carful, if your code breaks here, the file will be left open!

myfile.close()

#this won't work as the file is now closed
print(myfile.readlines())


['Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo.\n', 'Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur?\n', 'Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla pariatur?\n']


ValueError: I/O operation on closed file.

In [136]:
# Using the "with" method is far better, as it automatically closes the file after

with open('./data/ex.txt', 'r') as f:
    print(f.readlines())

['Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo.\n', 'Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur?\n', 'Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla pariatur?\n']


In [63]:
# Using the "generator" for file lines is also a great way not to load all the file at once! What if its huge!

with open('./data/ex.txt', 'r') as f:
    while line:
        print(line)
        line = f.readline()

# write a line

In [64]:
#oopsie, wrong mode!
with open('./data/ex2.txt', 'r') as f:
    f.write("this is a new line")

UnsupportedOperation: not writable

In [68]:
#even this will blank your file!
#it won't ask, and it won't go to recycle bin!

with open('./data/ex3.txt', 'w') as f:
    pass

In [66]:
with open('./data/ex2.txt', 'w') as f:
    f.write("this is a new line\n")
    f.write("this is a new line")

In [71]:
with open('./data/ex2.txt', 'w') as f:
    f.write("this is a new line")

#nb, now there is only one line. The "w" means, opening will blank the file. CAUTION!

In [72]:
# mode a for append, to write to the end of the file
with open('./data/ex2.txt', 'a') as f:
    f.write("this is a new line\n")

In [85]:
#using """ """ can let you work with multi-line strings
with open('./data/ex2.txt', 'a') as f:
    #f.write("this is a new line\n")
    #f.write("this is a new file\nwith a new line")
    f.write("""
this is a new file

with a new line

and another
    """)

In [None]:
#compare with this
with open('./data/ex2.txt', 'a') as f:
    #f.write("this is a new line\n")
    f.write("this is a new file\nwith a new line")

In [98]:
with open('./data/ex2.txt', 'rb') as f:
    print(f.read())

b'\nthis is a new file\n\nwith a new line\n\nand another\n    \nthis is a new file\n\nwith a new line\n\nand another\n    '


In [None]:
#all the possible modes can be found here
# https://docs.python.org/3/library/functions.html#open

#important to be careful with "w" vs "a". 

# Looping over all files in a folder

In [140]:
import os

#change to a path on your own machine!
directory = os.fsencode('/home/benjamin/Desktop/webinars/notebooks/data')
directory

b'/home/benjamin/Desktop/webinars/notebooks/data'

In [141]:
for file in os.listdir(directory):
    print( file )

b'eg1.xlsx'
b'ex.txt'
b'ex5.txt'
b'examples'
b'result.xlsx'
b'ex2.txt'
b'sampledatafoodsales.xlsx'
b'ex3.txt'


In [142]:
#recursive
import glob

for filename in glob.iglob('/home/benjamin/Desktop/webinars/**/*.py', recursive=True):
    with open(filename, 'rb') as f:
        print("\n\n~~~~~~~~~~~~~~~~~~")
        print("\n", filename, "\n")
        try:
            print( f.readlines()[-3] )
        except Exception as e:
            print(repr(e))




~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/test.py 

b'print(x)\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/versioneer.py 

b'        errors += scan_setup_py()\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/setup.py 

b'        ext_modules=maybe_cythonize(extensions, compiler_directives=directives),\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/_version.py 

b'        "error": "unable to compute version",\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/__init__.py 

b'  - Time series-specific functionality: date range generation and frequency\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/conftest.py 

b'    Fixture to check if the array manager is being u

b'        progress_bar=progress_bar,\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/io/clipboard/__init__.py 

b'# pandas aliases\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/io/excel/_xlsxwriter.py 

b'                )\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/io/excel/__init__.py 

b'\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/io/excel/_xlwt.py 

b'            style.num_format_str = num_format_str\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/io/excel/_base.py 

b'            self.close()\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/io/excel/_openpyxl.py 

b'                

b'        )\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/groupby/test_allowlist.py 

b'If you removed a method, you should update them\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/groupby/test_groupby_subclass.py 

b'    # Confirm groupby.resample() preserves dataframe type\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/groupby/__init__.py 

IndexError('list index out of range')


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/groupby/test_min_max.py 

b'    expected["B"] = expected["B"].astype(ds.dtype)\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/groupby/test_libgroupby.py 

b'        ]\n'


~~~~~~~~~~~~~~~~~~

 /home/benjam


 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/io/test_compression.py 

b'    """\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/io/__init__.py 



~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/io/test_clipboard.py 

b"    # PR #25040 wide unicode wasn't copied correctly on PY3 on windows\n"


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/io/test_gcs.py 

b'def test_gcs_not_present_exception():\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/io/test_common.py 

b'    with pytest.raises(FileNotFoundError, match="\\\\[Errno 2\\\\]") as err:\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/io/

b'        return data\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/arrays/integer/test_function.py 

b'# shift\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/arrays/integer/test_arithmetic.py 

b'    tm.assert_extension_array_equal(neg_result, neg_target)\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/arrays/integer/test_concat.py 

b'    result = pd.concat([s2, s1], ignore_index=True)\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/arrays/categorical/__init__.py 

IndexError('list index out of range')


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/arrays/categorical/test_repr.py 

b'        result = repr(Categorical([1, "2", 3, 

b'        pd.Categorical(["b", "b", "a"], categories=["a", "b", "c"]), index=[1, 1, 0]\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/series/indexing/test_mask.py 



~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/series/indexing/test_set_value.py 

b'    ser2.loc["foobar"] = 0\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/series/indexing/test_indexing.py 

b'    assert s[idx1] == 2\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/series/indexing/test_getitem.py 

b'    s = Series(range(5), index=list(index_vals))\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/tests/series/indexing/test_xs.py 

b'\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/D



~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/plotting/_matplotlib/timeseries.py 

b'        raise TypeError("index type not supported")\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/plotting/_matplotlib/compat.py 

b'mpl_ge_3_2_0 = _mpl_version("3.2.0", operator.ge)\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/plotting/_matplotlib/hist.py 

b'    maybe_adjust_figure(fig, wspace=0.3, hspace=0.3)\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/plotting/_matplotlib/core.py 

b'            leglabels = labels if labels is not None else idx\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/pandas-master/pandas/plotting/_matplotlib/boxplot.py 

b'            **kwds,\n'


~~~~~~~~~~~~~~~

b"    axes_row[0].set_ylabel('C = %s' % C)\n"


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/examples/linear_model/plot_ransac.py 

b'plt.xlabel("Input")\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/examples/linear_model/plot_ard.py 

b'plt.xlabel("Feature X")\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/examples/linear_model/plot_logistic_path.py 

b"plt.title('Logistic Regression Path')\n"


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/examples/covariance/plot_covariance_estimation.py 

b'plt.legend()\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/examples/covariance/plot_lw_vs_oas.py 

b'plt.xlim(5, 31)\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/



~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/sklearn/mixture/_gaussian_mixture.py 

b'            The lower the better.\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/sklearn/mixture/tests/test_gaussian_mixture.py 

b'        "lower_bound_",\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/sklearn/mixture/tests/test_mixture.py 

b'    msg = "Expected n_samples >= n_components"\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/sklearn/mixture/tests/__init__.py 

IndexError('list index out of range')


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/sklearn/mixture/tests/test_bayesian_mixture.py 

b'            Y_pred_proba = bgmm.predict_proba(X).argmax(axis=1)\n'


~~~~~~~~~~~~~~~~~~

b'            Perplexity score.\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/sklearn/decomposition/_truncated_svd.py 

b'\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/sklearn/decomposition/__init__.py 

b'    "TruncatedSVD",\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/sklearn/decomposition/_incremental_pca.py 

b'            return np.vstack(output)\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/sklearn/decomposition/_dict_learning.py 

b'        self.inner_stats_ = (A, B)\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/sklearn/decomposition/_factor_analysis.py 

b'        var = var_new\n'


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/example



~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/sklearn/datasets/tests/data/openml/id_40675/__init__.py 

IndexError('list index out of range')


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/sklearn/datasets/tests/data/openml/id_1/__init__.py 

IndexError('list index out of range')


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/sklearn/datasets/tests/data/openml/id_2/__init__.py 

IndexError('list index out of range')


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/sklearn/datasets/tests/data/openml/id_40589/__init__.py 

IndexError('list index out of range')


~~~~~~~~~~~~~~~~~~

 /home/benjamin/Desktop/webinars/notebooks/data/examples/lotsofcode/scikit-learn-main/sklearn/datasets/tests/data/openml/id_40966/__init__.py 

IndexError('list index out

# Opening Excel files with pandas

In [143]:
import pandas as pd

In [144]:
df = pd.read_excel("./data/eg1.xlsx")

In [145]:
#df = pd.read_excel("/home/benjamin/Desktop/webinars/notebooks/data/eg1.xlsx")

In [130]:
df

Unnamed: 0,OrderDate,Region,City,Category,Product,Quantity,UnitPrice,TotalPrice
0,2020-01-01,East,Boston,Bars,Carrot,33,1.77,
1,2020-01-04,East,Boston,Crackers,Whole Wheat,87,3.49,
2,2020-01-07,West,Los Angeles,Cookies,Chocolate Chip,58,1.87,
3,2020-01-10,East,New York,Cookies,Chocolate Chip,82,1.87,
4,2020-01-13,East,Boston,Cookies,Arrowroot,38,2.18,
...,...,...,...,...,...,...,...,...
239,2021-12-18,East,Boston,Cookies,Arrowroot,34,2.18,
240,2021-12-21,East,Boston,Cookies,Chocolate Chip,245,1.87,
241,2021-12-24,East,Boston,Crackers,Whole Wheat,30,3.49,
242,2021-12-27,West,Los Angeles,Bars,Bran,30,1.87,


In [131]:
df['total_cost'] = df['Quantity']*df['UnitPrice']

In [132]:
df

Unnamed: 0,OrderDate,Region,City,Category,Product,Quantity,UnitPrice,TotalPrice,total_cost
0,2020-01-01,East,Boston,Bars,Carrot,33,1.77,,58.41
1,2020-01-04,East,Boston,Crackers,Whole Wheat,87,3.49,,303.63
2,2020-01-07,West,Los Angeles,Cookies,Chocolate Chip,58,1.87,,108.46
3,2020-01-10,East,New York,Cookies,Chocolate Chip,82,1.87,,153.34
4,2020-01-13,East,Boston,Cookies,Arrowroot,38,2.18,,82.84
...,...,...,...,...,...,...,...,...,...
239,2021-12-18,East,Boston,Cookies,Arrowroot,34,2.18,,74.12
240,2021-12-21,East,Boston,Cookies,Chocolate Chip,245,1.87,,458.15
241,2021-12-24,East,Boston,Crackers,Whole Wheat,30,3.49,,104.70
242,2021-12-27,West,Los Angeles,Bars,Bran,30,1.87,,56.10


In [134]:
df.to_excel("./data/result.xlsx")