Merge pull request scikit-learn#3612 from calvingiles/fix-resource-wa…

…rnings [MRG+1] Fixed ResourceWarnings from inside scikit-learn. Fixes scikit-learn#3410.
IssamLaradji · Oct 13, 2014 · 405df05 · 405df05
2 parents 00968d2 + f97912f
commit 405df05
Show file tree

Hide file tree

Showing 5 changed files with 40 additions and 26 deletions.
diff --git a/setup.py b/setup.py
@@ -23,7 +23,8 @@
 
 DISTNAME = 'scikit-learn'
 DESCRIPTION = 'A set of python modules for machine learning and data mining'
-LONG_DESCRIPTION = open('README.rst').read()
+with open('README.rst') as f:
+    LONG_DESCRIPTION = f.read()
 MAINTAINER = 'Andreas Mueller'
 MAINTAINER_EMAIL = 'amueller@ais.uni-bonn.de'
 URL = 'http://scikit-learn.org'

diff --git a/sklearn/datasets/base.py b/sklearn/datasets/base.py
@@ -183,7 +183,10 @@ def load_files(container_path, description=None, categories=None,
         target = target[indices]
 
     if load_content:
-        data = [open(filename, 'rb').read() for filename in filenames]
+        data = []
+        for filename in filenames:
+            with open(filename, 'rb') as f:
+                data.append(f.read())
         if encoding is not None:
             data = [d.decode(encoding, decode_error) for d in data]
         return Bunch(data=data,
@@ -301,7 +304,8 @@ def load_digits(n_class=10):
     module_path = dirname(__file__)
     data = np.loadtxt(join(module_path, 'data', 'digits.csv.gz'),
                       delimiter=',')
-    descr = open(join(module_path, 'descr', 'digits.rst')).read()
+    with open(join(module_path, 'descr', 'digits.rst')) as f:
+        descr = f.read()
     target = data[:, -1]
     flat_data = data[:, :-1]
     images = flat_data.view()
@@ -402,26 +406,31 @@ def load_boston():
     (506, 13)
     """
     module_path = dirname(__file__)
-    data_file = csv.reader(open(join(module_path, 'data',
-                                     'boston_house_prices.csv')))
-    fdescr = open(join(module_path, 'descr', 'boston_house_prices.rst'))
-    temp = next(data_file)
-    n_samples = int(temp[0])
-    n_features = int(temp[1])
-    data = np.empty((n_samples, n_features))
-    target = np.empty((n_samples,))
-    temp = next(data_file)  # names of features
-    feature_names = np.array(temp)
-
-    for i, d in enumerate(data_file):
-        data[i] = np.asarray(d[:-1], dtype=np.float)
-        target[i] = np.asarray(d[-1], dtype=np.float)
+
+    fdescr_name = join(module_path, 'descr', 'boston_house_prices.rst')
+    with open(fdescr_name) as f:
+        descr_text = f.read()
+
+    data_file_name = join(module_path, 'data', 'boston_house_prices.csv')
+    with open(data_file_name) as f:
+        data_file = csv.reader(f)
+        temp = next(data_file)
+        n_samples = int(temp[0])
+        n_features = int(temp[1])
+        data = np.empty((n_samples, n_features))
+        target = np.empty((n_samples,))
+        temp = next(data_file)  # names of features
+        feature_names = np.array(temp)
+
+        for i, d in enumerate(data_file):
+            data[i] = np.asarray(d[:-1], dtype=np.float)
+            target[i] = np.asarray(d[-1], dtype=np.float)
 
     return Bunch(data=data,
                  target=target,
                  # last column is target value
                  feature_names=feature_names[:-1],
-                 DESCR=fdescr.read())
+                 DESCR=descr_text)
 
 
 def load_sample_images():

diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py
@@ -113,17 +113,19 @@ def test_load_compressed():
 
     with NamedTemporaryFile(prefix="sklearn-test", suffix=".gz") as tmp:
         tmp.close()  # necessary under windows
-        shutil.copyfileobj(open(datafile, "rb"), gzip.open(tmp.name, "wb"))
+        with open(datafile, "rb") as f:
+            shutil.copyfileobj(f, gzip.open(tmp.name, "wb"))
         Xgz, ygz = load_svmlight_file(tmp.name)
-        assert_array_equal(X.toarray(), Xgz.toarray())
-        assert_array_equal(y, ygz)
+    assert_array_equal(X.toarray(), Xgz.toarray())
+    assert_array_equal(y, ygz)
 
     with NamedTemporaryFile(prefix="sklearn-test", suffix=".bz2") as tmp:
         tmp.close()  # necessary under windows
-        shutil.copyfileobj(open(datafile, "rb"), BZ2File(tmp.name, "wb"))
+        with open(datafile, "rb") as f:
+            shutil.copyfileobj(f, BZ2File(tmp.name, "wb"))
         Xbz, ybz = load_svmlight_file(tmp.name)
-        assert_array_equal(X.toarray(), Xbz.toarray())
-        assert_array_equal(y, ybz)
+    assert_array_equal(X.toarray(), Xbz.toarray())
+    assert_array_equal(y, ybz)
 
 
 @raises(ValueError)

diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py
@@ -201,7 +201,8 @@ def fetch_20newsgroups(data_home=None, subset='train', categories=None,
     cache = None
     if os.path.exists(cache_path):
         try:
-            compressed_content = open(cache_path, 'rb').read()
+            with open(cache_path, 'rb') as f:
+                compressed_content = f.read()
             uncompressed_content = codecs.decode(
                 compressed_content, 'zlib_codec')
             cache = pickle.loads(uncompressed_content)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
@@ -189,7 +189,8 @@ def test_configure():
             # Blas/Atlas development headers
             warnings.simplefilter('ignore', UserWarning)
             if PY3:
-                exec(open('setup.py').read(), dict(__name__='__main__'))
+                with open('setup.py') as f:
+                    exec(f.read(), dict(__name__='__main__'))
             else:
                 execfile('setup.py', dict(__name__='__main__'))
     finally: